Profiler.cc revision 7056
17585SAli.Saidi@arm.com/*
27585SAli.Saidi@arm.com * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
37585SAli.Saidi@arm.com * All rights reserved.
47585SAli.Saidi@arm.com *
57585SAli.Saidi@arm.com * Redistribution and use in source and binary forms, with or without
67585SAli.Saidi@arm.com * modification, are permitted provided that the following conditions are
77585SAli.Saidi@arm.com * met: redistributions of source code must retain the above copyright
87585SAli.Saidi@arm.com * notice, this list of conditions and the following disclaimer;
97585SAli.Saidi@arm.com * redistributions in binary form must reproduce the above copyright
107585SAli.Saidi@arm.com * notice, this list of conditions and the following disclaimer in the
117585SAli.Saidi@arm.com * documentation and/or other materials provided with the distribution;
127585SAli.Saidi@arm.com * neither the name of the copyright holders nor the names of its
137585SAli.Saidi@arm.com * contributors may be used to endorse or promote products derived from
147585SAli.Saidi@arm.com * this software without specific prior written permission.
157585SAli.Saidi@arm.com *
167585SAli.Saidi@arm.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
177585SAli.Saidi@arm.com * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
187585SAli.Saidi@arm.com * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
197585SAli.Saidi@arm.com * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
207585SAli.Saidi@arm.com * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
217585SAli.Saidi@arm.com * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
227585SAli.Saidi@arm.com * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
237585SAli.Saidi@arm.com * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
247585SAli.Saidi@arm.com * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
257585SAli.Saidi@arm.com * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
267585SAli.Saidi@arm.com * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
277585SAli.Saidi@arm.com */
287585SAli.Saidi@arm.com
297585SAli.Saidi@arm.com/*
307585SAli.Saidi@arm.com   This file has been modified by Kevin Moore and Dan Nussbaum of the
317585SAli.Saidi@arm.com   Scalable Systems Research Group at Sun Microsystems Laboratories
327585SAli.Saidi@arm.com   (http://research.sun.com/scalable/) to support the Adaptive
337585SAli.Saidi@arm.com   Transactional Memory Test Platform (ATMTP).
347585SAli.Saidi@arm.com
357585SAli.Saidi@arm.com   Please send email to atmtp-interest@sun.com with feedback, questions, or
367585SAli.Saidi@arm.com   to request future announcements about ATMTP.
377585SAli.Saidi@arm.com
387585SAli.Saidi@arm.com   ----------------------------------------------------------------------
397585SAli.Saidi@arm.com
407585SAli.Saidi@arm.com   File modification date: 2008-02-23
417585SAli.Saidi@arm.com
427585SAli.Saidi@arm.com   ----------------------------------------------------------------------
437585SAli.Saidi@arm.com*/
447585SAli.Saidi@arm.com
457585SAli.Saidi@arm.com// Allows use of times() library call, which determines virtual runtime
467585SAli.Saidi@arm.com#include <sys/resource.h>
477585SAli.Saidi@arm.com#include <sys/times.h>
487585SAli.Saidi@arm.com
497585SAli.Saidi@arm.com#include "base/str.hh"
507585SAli.Saidi@arm.com#include "mem/gems_common/Map.hh"
517585SAli.Saidi@arm.com#include "mem/gems_common/PrioHeap.hh"
527585SAli.Saidi@arm.com#include "mem/protocol/CacheMsg.hh"
537585SAli.Saidi@arm.com#include "mem/protocol/MachineType.hh"
547585SAli.Saidi@arm.com#include "mem/protocol/Protocol.hh"
557585SAli.Saidi@arm.com#include "mem/ruby/common/Debug.hh"
567585SAli.Saidi@arm.com#include "mem/ruby/network/Network.hh"
577585SAli.Saidi@arm.com#include "mem/ruby/profiler/AddressProfiler.hh"
587585SAli.Saidi@arm.com#include "mem/ruby/profiler/Profiler.hh"
597585SAli.Saidi@arm.com#include "mem/ruby/system/System.hh"
607585SAli.Saidi@arm.com#include "mem/ruby/system/System.hh"
617585SAli.Saidi@arm.com
627585SAli.Saidi@arm.comusing namespace std;
637585SAli.Saidi@arm.com
647585SAli.Saidi@arm.comextern ostream* debug_cout_ptr;
657585SAli.Saidi@arm.com
667585SAli.Saidi@arm.comstatic double process_memory_total();
677585SAli.Saidi@arm.comstatic double process_memory_resident();
687585SAli.Saidi@arm.com
697585SAli.Saidi@arm.comProfiler::Profiler(const Params *p)
707585SAli.Saidi@arm.com    : SimObject(p)
717585SAli.Saidi@arm.com{
727585SAli.Saidi@arm.com    m_requestProfileMap_ptr = new Map<string, int>;
737585SAli.Saidi@arm.com
747585SAli.Saidi@arm.com    m_inst_profiler_ptr = NULL;
757585SAli.Saidi@arm.com    m_address_profiler_ptr = NULL;
767585SAli.Saidi@arm.com
777585SAli.Saidi@arm.com    m_real_time_start_time = time(NULL); // Not reset in clearStats()
787585SAli.Saidi@arm.com    m_stats_period = 1000000; // Default
797585SAli.Saidi@arm.com    m_periodic_output_file_ptr = &cerr;
807585SAli.Saidi@arm.com
81    m_hot_lines = p->hot_lines;
82    m_all_instructions = p->all_instructions;
83
84    m_num_of_sequencers = p->num_of_sequencers;
85
86    m_hot_lines = false;
87    m_all_instructions = false;
88
89    m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
90    m_address_profiler_ptr->setHotLines(m_hot_lines);
91    m_address_profiler_ptr->setAllInstructions(m_all_instructions);
92
93    if (m_all_instructions) {
94        m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
95        m_inst_profiler_ptr->setHotLines(m_hot_lines);
96        m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
97    }
98}
99
100Profiler::~Profiler()
101{
102    if (m_periodic_output_file_ptr != &cerr) {
103        delete m_periodic_output_file_ptr;
104    }
105
106    delete m_requestProfileMap_ptr;
107}
108
109void
110Profiler::wakeup()
111{
112    // FIXME - avoid the repeated code
113
114    Vector<integer_t> perProcCycleCount;
115    perProcCycleCount.setSize(m_num_of_sequencers);
116
117    for (int i = 0; i < m_num_of_sequencers; i++) {
118        perProcCycleCount[i] =
119            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
120        // The +1 allows us to avoid division by zero
121    }
122
123    ostream &out = *m_periodic_output_file_ptr;
124
125    out << "ruby_cycles: " << g_eventQueue_ptr->getTime()-m_ruby_start << endl
126        << "mbytes_resident: " << process_memory_resident() << endl
127        << "mbytes_total: " << process_memory_total() << endl;
128
129    if (process_memory_total() > 0) {
130        out << "resident_ratio: "
131            << process_memory_resident() / process_memory_total() << endl;
132    }
133
134    out << "miss_latency: " << m_allMissLatencyHistogram << endl;
135
136    out << endl;
137
138    if (m_all_instructions) {
139        m_inst_profiler_ptr->printStats(out);
140    }
141
142    //g_system_ptr->getNetwork()->printStats(out);
143    g_eventQueue_ptr->scheduleEvent(this, m_stats_period);
144}
145
146void
147Profiler::setPeriodicStatsFile(const string& filename)
148{
149    cout << "Recording periodic statistics to file '" << filename << "' every "
150         << m_stats_period << " Ruby cycles" << endl;
151
152    if (m_periodic_output_file_ptr != &cerr) {
153        delete m_periodic_output_file_ptr;
154    }
155
156    m_periodic_output_file_ptr = new ofstream(filename.c_str());
157    g_eventQueue_ptr->scheduleEvent(this, 1);
158}
159
160void
161Profiler::setPeriodicStatsInterval(integer_t period)
162{
163    cout << "Recording periodic statistics every " << m_stats_period
164         << " Ruby cycles" << endl;
165
166    m_stats_period = period;
167    g_eventQueue_ptr->scheduleEvent(this, 1);
168}
169
170void
171Profiler::printConfig(ostream& out) const
172{
173    out << endl;
174    out << "Profiler Configuration" << endl;
175    out << "----------------------" << endl;
176    out << "periodic_stats_period: " << m_stats_period << endl;
177}
178
179void
180Profiler::print(ostream& out) const
181{
182    out << "[Profiler]";
183}
184
185void
186Profiler::printStats(ostream& out, bool short_stats)
187{
188    out << endl;
189    if (short_stats) {
190        out << "SHORT ";
191    }
192    out << "Profiler Stats" << endl;
193    out << "--------------" << endl;
194
195    time_t real_time_current = time(NULL);
196    double seconds = difftime(real_time_current, m_real_time_start_time);
197    double minutes = seconds / 60.0;
198    double hours = minutes / 60.0;
199    double days = hours / 24.0;
200    Time ruby_cycles = g_eventQueue_ptr->getTime()-m_ruby_start;
201
202    if (!short_stats) {
203        out << "Elapsed_time_in_seconds: " << seconds << endl;
204        out << "Elapsed_time_in_minutes: " << minutes << endl;
205        out << "Elapsed_time_in_hours: " << hours << endl;
206        out << "Elapsed_time_in_days: " << days << endl;
207        out << endl;
208    }
209
210    // print the virtual runtimes as well
211    struct tms vtime;
212    times(&vtime);
213    seconds = (vtime.tms_utime + vtime.tms_stime) / 100.0;
214    minutes = seconds / 60.0;
215    hours = minutes / 60.0;
216    days = hours / 24.0;
217    out << "Virtual_time_in_seconds: " << seconds << endl;
218    out << "Virtual_time_in_minutes: " << minutes << endl;
219    out << "Virtual_time_in_hours:   " << hours << endl;
220    out << "Virtual_time_in_days:    " << days << endl;
221    out << endl;
222
223    out << "Ruby_current_time: " << g_eventQueue_ptr->getTime() << endl;
224    out << "Ruby_start_time: " << m_ruby_start << endl;
225    out << "Ruby_cycles: " << ruby_cycles << endl;
226    out << endl;
227
228    if (!short_stats) {
229        out << "mbytes_resident: " << process_memory_resident() << endl;
230        out << "mbytes_total: " << process_memory_total() << endl;
231        if (process_memory_total() > 0) {
232            out << "resident_ratio: "
233                << process_memory_resident()/process_memory_total() << endl;
234        }
235        out << endl;
236    }
237
238    Vector<integer_t> perProcCycleCount;
239    perProcCycleCount.setSize(m_num_of_sequencers);
240
241    for (int i = 0; i < m_num_of_sequencers; i++) {
242        perProcCycleCount[i] =
243            g_system_ptr->getCycleCount(i) - m_cycles_executed_at_start[i] + 1;
244        // The +1 allows us to avoid division by zero
245    }
246
247    out << "ruby_cycles_executed: " << perProcCycleCount << endl;
248
249    out << endl;
250
251    if (!short_stats) {
252        out << "Busy Controller Counts:" << endl;
253        for (int i = 0; i < MachineType_NUM; i++) {
254            int size = MachineType_base_count((MachineType)i);
255            for (int j = 0; j < size; j++) {
256                MachineID machID;
257                machID.type = (MachineType)i;
258                machID.num = j;
259                out << machID << ":" << m_busyControllerCount[i][j] << "  ";
260                if ((j + 1) % 8 == 0) {
261                    out << endl;
262                }
263            }
264            out << endl;
265        }
266        out << endl;
267
268        out << "Busy Bank Count:" << m_busyBankCount << endl;
269        out << endl;
270
271        out << "sequencer_requests_outstanding: "
272            << m_sequencer_requests << endl;
273        out << endl;
274    }
275
276    if (!short_stats) {
277        out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
278        out << "----------------------------------------" << endl;
279        out << "miss_latency: " << m_allMissLatencyHistogram << endl;
280        for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
281            if (m_missLatencyHistograms[i].size() > 0) {
282                out << "miss_latency_" << RubyRequestType(i) << ": "
283                    << m_missLatencyHistograms[i] << endl;
284            }
285        }
286        for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
287            if (m_machLatencyHistograms[i].size() > 0) {
288                out << "miss_latency_" << GenericMachineType(i) << ": "
289                    << m_machLatencyHistograms[i] << endl;
290            }
291        }
292
293        out << endl;
294
295        out << "All Non-Zero Cycle SW Prefetch Requests" << endl;
296        out << "------------------------------------" << endl;
297        out << "prefetch_latency: " << m_allSWPrefetchLatencyHistogram << endl;
298        for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
299            if (m_SWPrefetchLatencyHistograms[i].size() > 0) {
300                out << "prefetch_latency_" << CacheRequestType(i) << ": "
301                    << m_SWPrefetchLatencyHistograms[i] << endl;
302            }
303        }
304        for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
305            if (m_SWPrefetchMachLatencyHistograms[i].size() > 0) {
306                out << "prefetch_latency_" << GenericMachineType(i) << ": "
307                    << m_SWPrefetchMachLatencyHistograms[i] << endl;
308            }
309        }
310        out << "prefetch_latency_L2Miss:"
311            << m_SWPrefetchL2MissLatencyHistogram << endl;
312
313        if (m_all_sharing_histogram.size() > 0) {
314            out << "all_sharing: " << m_all_sharing_histogram << endl;
315            out << "read_sharing: " << m_read_sharing_histogram << endl;
316            out << "write_sharing: " << m_write_sharing_histogram << endl;
317
318            out << "all_sharing_percent: ";
319            m_all_sharing_histogram.printPercent(out);
320            out << endl;
321
322            out << "read_sharing_percent: ";
323            m_read_sharing_histogram.printPercent(out);
324            out << endl;
325
326            out << "write_sharing_percent: ";
327            m_write_sharing_histogram.printPercent(out);
328            out << endl;
329
330            int64 total_miss = m_cache_to_cache +  m_memory_to_cache;
331            out << "all_misses: " << total_miss << endl;
332            out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
333            out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
334            out << "cache_to_cache_percent: "
335                << 100.0 * (double(m_cache_to_cache) / double(total_miss))
336                << endl;
337            out << "memory_to_cache_percent: "
338                << 100.0 * (double(m_memory_to_cache) / double(total_miss))
339                << endl;
340            out << endl;
341        }
342
343        if (m_outstanding_requests.size() > 0) {
344            out << "outstanding_requests: ";
345            m_outstanding_requests.printPercent(out);
346            out << endl;
347            out << endl;
348        }
349    }
350
351    if (!short_stats) {
352        out << "Request vs. RubySystem State Profile" << endl;
353        out << "--------------------------------" << endl;
354        out << endl;
355
356        Vector<string> requestProfileKeys = m_requestProfileMap_ptr->keys();
357        requestProfileKeys.sortVector();
358
359        for (int i = 0; i < requestProfileKeys.size(); i++) {
360            int temp_int =
361                m_requestProfileMap_ptr->lookup(requestProfileKeys[i]);
362            double percent = (100.0 * double(temp_int)) / double(m_requests);
363            vector<string> items;
364            tokenize(items, requestProfileKeys[i], ':');
365            vector<string>::iterator i = items.begin();
366            vector<string>::iterator end = items.end();
367            for (; i != end; ++i)
368                out << setw(10) << *i;
369            out << setw(11) << temp_int;
370            out << setw(14) << percent << endl;
371        }
372        out << endl;
373
374        out << "filter_action: " << m_filter_action_histogram << endl;
375
376        if (!m_all_instructions) {
377            m_address_profiler_ptr->printStats(out);
378        }
379
380        if (m_all_instructions) {
381            m_inst_profiler_ptr->printStats(out);
382        }
383
384        out << endl;
385        out << "Message Delayed Cycles" << endl;
386        out << "----------------------" << endl;
387        out << "Total_delay_cycles: " <<   m_delayedCyclesHistogram << endl;
388        out << "Total_nonPF_delay_cycles: "
389            << m_delayedCyclesNonPFHistogram << endl;
390        for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
391            out << "  virtual_network_" << i << "_delay_cycles: "
392                << m_delayedCyclesVCHistograms[i] << endl;
393        }
394
395        printResourceUsage(out);
396    }
397}
398
399void
400Profiler::printResourceUsage(ostream& out) const
401{
402    out << endl;
403    out << "Resource Usage" << endl;
404    out << "--------------" << endl;
405
406    integer_t pagesize = getpagesize(); // page size in bytes
407    out << "page_size: " << pagesize << endl;
408
409    rusage usage;
410    getrusage (RUSAGE_SELF, &usage);
411
412    out << "user_time: " << usage.ru_utime.tv_sec << endl;
413    out << "system_time: " << usage.ru_stime.tv_sec << endl;
414    out << "page_reclaims: " << usage.ru_minflt << endl;
415    out << "page_faults: " << usage.ru_majflt << endl;
416    out << "swaps: " << usage.ru_nswap << endl;
417    out << "block_inputs: " << usage.ru_inblock << endl;
418    out << "block_outputs: " << usage.ru_oublock << endl;
419}
420
421void
422Profiler::clearStats()
423{
424    m_ruby_start = g_eventQueue_ptr->getTime();
425
426    m_cycles_executed_at_start.setSize(m_num_of_sequencers);
427    for (int i = 0; i < m_num_of_sequencers; i++) {
428        if (g_system_ptr == NULL) {
429            m_cycles_executed_at_start[i] = 0;
430        } else {
431            m_cycles_executed_at_start[i] = g_system_ptr->getCycleCount(i);
432        }
433    }
434
435    m_busyControllerCount.setSize(MachineType_NUM); // all machines
436    for (int i = 0; i < MachineType_NUM; i++) {
437        int size = MachineType_base_count((MachineType)i);
438        m_busyControllerCount[i].setSize(size);
439        for (int j = 0; j < size; j++) {
440            m_busyControllerCount[i][j] = 0;
441        }
442    }
443    m_busyBankCount = 0;
444
445    m_delayedCyclesHistogram.clear();
446    m_delayedCyclesNonPFHistogram.clear();
447    int size = RubySystem::getNetwork()->getNumberOfVirtualNetworks();
448    m_delayedCyclesVCHistograms.setSize(size);
449    for (int i = 0; i < size; i++) {
450        m_delayedCyclesVCHistograms[i].clear();
451    }
452
453    m_missLatencyHistograms.setSize(RubyRequestType_NUM);
454    for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
455        m_missLatencyHistograms[i].clear(200);
456    }
457    m_machLatencyHistograms.setSize(GenericMachineType_NUM+1);
458    for (int i = 0; i < m_machLatencyHistograms.size(); i++) {
459        m_machLatencyHistograms[i].clear(200);
460    }
461    m_allMissLatencyHistogram.clear(200);
462
463    m_SWPrefetchLatencyHistograms.setSize(CacheRequestType_NUM);
464    for (int i = 0; i < m_SWPrefetchLatencyHistograms.size(); i++) {
465        m_SWPrefetchLatencyHistograms[i].clear(200);
466    }
467    m_SWPrefetchMachLatencyHistograms.setSize(GenericMachineType_NUM+1);
468    for (int i = 0; i < m_SWPrefetchMachLatencyHistograms.size(); i++) {
469        m_SWPrefetchMachLatencyHistograms[i].clear(200);
470    }
471    m_allSWPrefetchLatencyHistogram.clear(200);
472
473    m_sequencer_requests.clear();
474    m_read_sharing_histogram.clear();
475    m_write_sharing_histogram.clear();
476    m_all_sharing_histogram.clear();
477    m_cache_to_cache = 0;
478    m_memory_to_cache = 0;
479
480    // clear HashMaps
481    m_requestProfileMap_ptr->clear();
482
483    // count requests profiled
484    m_requests = 0;
485
486    m_outstanding_requests.clear();
487    m_outstanding_persistent_requests.clear();
488
489    // Flush the prefetches through the system - used so that there
490    // are no outstanding requests after stats are cleared
491    //g_eventQueue_ptr->triggerAllEvents();
492
493    // update the start time
494    m_ruby_start = g_eventQueue_ptr->getTime();
495}
496
497void
498Profiler::addAddressTraceSample(const CacheMsg& msg, NodeID id)
499{
500    if (msg.getType() != CacheRequestType_IFETCH) {
501        // Note: The following line should be commented out if you
502        // want to use the special profiling that is part of the GS320
503        // protocol
504
505        // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
506        // profiled by the AddressProfiler
507        m_address_profiler_ptr->
508            addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
509                           msg.getType(), msg.getAccessMode(), id, false);
510    }
511}
512
513void
514Profiler::profileSharing(const Address& addr, AccessType type,
515                         NodeID requestor, const Set& sharers,
516                         const Set& owner)
517{
518    Set set_contacted(owner);
519    if (type == AccessType_Write) {
520        set_contacted.addSet(sharers);
521    }
522    set_contacted.remove(requestor);
523    int number_contacted = set_contacted.count();
524
525    if (type == AccessType_Write) {
526        m_write_sharing_histogram.add(number_contacted);
527    } else {
528        m_read_sharing_histogram.add(number_contacted);
529    }
530    m_all_sharing_histogram.add(number_contacted);
531
532    if (number_contacted == 0) {
533        m_memory_to_cache++;
534    } else {
535        m_cache_to_cache++;
536    }
537}
538
539void
540Profiler::profileMsgDelay(int virtualNetwork, int delayCycles)
541{
542    assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
543    m_delayedCyclesHistogram.add(delayCycles);
544    m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
545    if (virtualNetwork != 0) {
546        m_delayedCyclesNonPFHistogram.add(delayCycles);
547    }
548}
549
550// profiles original cache requests including PUTs
551void
552Profiler::profileRequest(const string& requestStr)
553{
554    m_requests++;
555
556    if (m_requestProfileMap_ptr->exist(requestStr)) {
557        (m_requestProfileMap_ptr->lookup(requestStr))++;
558    } else {
559        m_requestProfileMap_ptr->add(requestStr, 1);
560    }
561}
562
563void
564Profiler::controllerBusy(MachineID machID)
565{
566    m_busyControllerCount[(int)machID.type][(int)machID.num]++;
567}
568
569void
570Profiler::profilePFWait(Time waitTime)
571{
572    m_prefetchWaitHistogram.add(waitTime);
573}
574
575void
576Profiler::bankBusy()
577{
578    m_busyBankCount++;
579}
580
581// non-zero cycle demand request
582void
583Profiler::missLatency(Time t, RubyRequestType type)
584{
585    m_allMissLatencyHistogram.add(t);
586    m_missLatencyHistograms[type].add(t);
587}
588
589// non-zero cycle prefetch request
590void
591Profiler::swPrefetchLatency(Time t, CacheRequestType type,
592                            GenericMachineType respondingMach)
593{
594    m_allSWPrefetchLatencyHistogram.add(t);
595    m_SWPrefetchLatencyHistograms[type].add(t);
596    m_SWPrefetchMachLatencyHistograms[respondingMach].add(t);
597    if (respondingMach == GenericMachineType_Directory ||
598        respondingMach == GenericMachineType_NUM) {
599        m_SWPrefetchL2MissLatencyHistogram.add(t);
600    }
601}
602
603void
604Profiler::profileTransition(const string& component, NodeID version,
605    Address addr, const string& state, const string& event,
606    const string& next_state, const string& note)
607{
608    const int EVENT_SPACES = 20;
609    const int ID_SPACES = 3;
610    const int TIME_SPACES = 7;
611    const int COMP_SPACES = 10;
612    const int STATE_SPACES = 6;
613
614    if (g_debug_ptr->getDebugTime() <= 0 ||
615        g_eventQueue_ptr->getTime() < g_debug_ptr->getDebugTime())
616        return;
617
618    ostream &out = *debug_cout_ptr;
619    out.flags(ios::right);
620    out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
621    out << setw(ID_SPACES) << version << " ";
622    out << setw(COMP_SPACES) << component;
623    out << setw(EVENT_SPACES) << event << " ";
624
625    out.flags(ios::right);
626    out << setw(STATE_SPACES) << state;
627    out << ">";
628    out.flags(ios::left);
629    out << setw(STATE_SPACES) << next_state;
630
631    out << " " << addr << " " << note;
632
633    out << endl;
634}
635
636// Helper function
637static double
638process_memory_total()
639{
640    // 4kB page size, 1024*1024 bytes per MB,
641    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
642    ifstream proc_file;
643    proc_file.open("/proc/self/statm");
644    int total_size_in_pages = 0;
645    int res_size_in_pages = 0;
646    proc_file >> total_size_in_pages;
647    proc_file >> res_size_in_pages;
648    return double(total_size_in_pages) * MULTIPLIER; // size in megabytes
649}
650
651static double
652process_memory_resident()
653{
654    // 4kB page size, 1024*1024 bytes per MB,
655    const double MULTIPLIER = 4096.0 / (1024.0 * 1024.0);
656    ifstream proc_file;
657    proc_file.open("/proc/self/statm");
658    int total_size_in_pages = 0;
659    int res_size_in_pages = 0;
660    proc_file >> total_size_in_pages;
661    proc_file >> res_size_in_pages;
662    return double(res_size_in_pages) * MULTIPLIER; // size in megabytes
663}
664
665void
666Profiler::rubyWatch(int id)
667{
668    uint64 tr = 0;
669    Address watch_address = Address(tr);
670    const int ID_SPACES = 3;
671    const int TIME_SPACES = 7;
672
673    ostream &out = *debug_cout_ptr;
674
675    out.flags(ios::right);
676    out << setw(TIME_SPACES) << g_eventQueue_ptr->getTime() << " ";
677    out << setw(ID_SPACES) << id << " "
678        << "RUBY WATCH " << watch_address << endl;
679
680    if (!m_watch_address_list_ptr->exist(watch_address)) {
681        m_watch_address_list_ptr->add(watch_address, 1);
682    }
683}
684
685bool
686Profiler::watchAddress(Address addr)
687{
688    if (m_watch_address_list_ptr->exist(addr))
689        return true;
690    else
691        return false;
692}
693
694Profiler *
695RubyProfilerParams::create()
696{
697    return new Profiler(this);
698}
699