Profiler.cc (9923:cdd51a15e9be) Profiler.cc (10012:ec5a5bfb941d)
1/*
1/*
2 * Copyright (c) 1999-2008 Mark D. Hill and David A. Wood
2 * Copyright (c) 1999-2013 Mark D. Hill and David A. Wood
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the

--- 45 unchanged lines hidden (view full) ---

56#include "mem/ruby/profiler/AddressProfiler.hh"
57#include "mem/ruby/profiler/Profiler.hh"
58#include "mem/ruby/system/Sequencer.hh"
59#include "mem/ruby/system/System.hh"
60
61using namespace std;
62using m5::stl_helpers::operator<<;
63
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the

--- 45 unchanged lines hidden (view full) ---

56#include "mem/ruby/profiler/AddressProfiler.hh"
57#include "mem/ruby/profiler/Profiler.hh"
58#include "mem/ruby/system/Sequencer.hh"
59#include "mem/ruby/system/System.hh"
60
61using namespace std;
62using m5::stl_helpers::operator<<;
63
64Profiler::Profiler(const Params *p)
65 : SimObject(p)
64Profiler::Profiler(const RubySystemParams *p)
65 : m_IncompleteTimes(MachineType_NUM)
66{
66{
67 m_inst_profiler_ptr = NULL;
68 m_address_profiler_ptr = NULL;
69 m_real_time_start_time = time(NULL); // Not reset in clearStats()
70
71 m_hot_lines = p->hot_lines;
72 m_all_instructions = p->all_instructions;
73
67 m_hot_lines = p->hot_lines;
68 m_all_instructions = p->all_instructions;
69
74 m_num_of_sequencers = p->num_of_sequencers;
75
76 m_hot_lines = false;
77 m_all_instructions = false;
78
79 m_address_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
70 m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers);
80 m_address_profiler_ptr->setHotLines(m_hot_lines);
81 m_address_profiler_ptr->setAllInstructions(m_all_instructions);
82
83 if (m_all_instructions) {
71 m_address_profiler_ptr->setHotLines(m_hot_lines);
72 m_address_profiler_ptr->setAllInstructions(m_all_instructions);
73
74 if (m_all_instructions) {
84 m_inst_profiler_ptr = new AddressProfiler(m_num_of_sequencers);
75 m_inst_profiler_ptr = new AddressProfiler(p->num_of_sequencers);
85 m_inst_profiler_ptr->setHotLines(m_hot_lines);
86 m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
87 }
76 m_inst_profiler_ptr->setHotLines(m_hot_lines);
77 m_inst_profiler_ptr->setAllInstructions(m_all_instructions);
78 }
88
89 p->ruby_system->registerProfiler(this);
90}
91
92Profiler::~Profiler()
93{
94}
95
96void
79}
80
81Profiler::~Profiler()
82{
83}
84
85void
97Profiler::print(ostream& out) const
86Profiler::regStats(const std::string &pName)
98{
87{
99 out << "[Profiler]";
100}
88 if (!m_all_instructions) {
89 m_address_profiler_ptr->regStats(pName);
90 }
101
91
102void
103Profiler::printRequestProfile(ostream &out) const
104{
105 out << "Request vs. RubySystem State Profile" << endl;
106 out << "--------------------------------" << endl;
107 out << endl;
92 if (m_all_instructions) {
93 m_inst_profiler_ptr->regStats(pName);
94 }
108
95
109 map<string, uint64_t> m_requestProfileMap;
110 uint64_t m_requests = 0;
96 delayHistogram
97 .init(10)
98 .name(pName + ".delayHist")
99 .desc("delay histogram for all message")
100 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
111
101
112 for (uint32_t i = 0; i < MachineType_NUM; i++) {
113 for (map<uint32_t, AbstractController*>::iterator it =
114 g_abs_controls[i].begin();
115 it != g_abs_controls[i].end(); ++it) {
102 uint32_t numVNets = Network::getNumberOfVirtualNetworks();
103 for (int i = 0; i < numVNets; i++) {
104 delayVCHistogram.push_back(new Stats::Histogram());
105 delayVCHistogram[i]
106 ->init(10)
107 .name(pName + csprintf(".delayVCHist.vnet_%i", i))
108 .desc(csprintf("delay histogram for vnet_%i", i))
109 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
110 }
116
111
117 AbstractController *ctr = (*it).second;
118 map<string, uint64_t> mp = ctr->getRequestProfileMap();
112 m_outstandReqHist
113 .init(10)
114 .name(pName + ".outstanding_req_hist")
115 .desc("")
116 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
119
117
120 for (map<string, uint64_t>::iterator jt = mp.begin();
121 jt != mp.end(); ++jt) {
118 m_latencyHist
119 .init(10)
120 .name(pName + ".latency_hist")
121 .desc("")
122 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
122
123
123 map<string, uint64_t>::iterator kt =
124 m_requestProfileMap.find((*jt).first);
125 if (kt != m_requestProfileMap.end()) {
126 (*kt).second += (*jt).second;
127 } else {
128 m_requestProfileMap[(*jt).first] = (*jt).second;
129 }
130 }
124 m_hitLatencyHist
125 .init(10)
126 .name(pName + ".hit_latency_hist")
127 .desc("")
128 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
131
129
132 m_requests += ctr->getRequestCount();
133 }
130 m_missLatencyHist
131 .init(10)
132 .name(pName + ".miss_latency_hist")
133 .desc("")
134 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
135
136 for (int i = 0; i < RubyRequestType_NUM; i++) {
137 m_typeLatencyHist.push_back(new Stats::Histogram());
138 m_typeLatencyHist[i]
139 ->init(10)
140 .name(pName + csprintf(".%s.latency_hist",
141 RubyRequestType(i)))
142 .desc("")
143 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
144
145 m_hitTypeLatencyHist.push_back(new Stats::Histogram());
146 m_hitTypeLatencyHist[i]
147 ->init(10)
148 .name(pName + csprintf(".%s.hit_latency_hist",
149 RubyRequestType(i)))
150 .desc("")
151 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
152
153 m_missTypeLatencyHist.push_back(new Stats::Histogram());
154 m_missTypeLatencyHist[i]
155 ->init(10)
156 .name(pName + csprintf(".%s.miss_latency_hist",
157 RubyRequestType(i)))
158 .desc("")
159 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
134 }
135
160 }
161
136 map<string, uint64_t>::const_iterator i = m_requestProfileMap.begin();
137 map<string, uint64_t>::const_iterator end = m_requestProfileMap.end();
138 for (; i != end; ++i) {
139 const string &key = i->first;
140 uint64_t count = i->second;
162 for (int i = 0; i < MachineType_NUM; i++) {
163 m_hitMachLatencyHist.push_back(new Stats::Histogram());
164 m_hitMachLatencyHist[i]
165 ->init(10)
166 .name(pName + csprintf(".%s.hit_mach_latency_hist",
167 MachineType(i)))
168 .desc("")
169 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
141
170
142 double percent = (100.0 * double(count)) / double(m_requests);
143 vector<string> items;
144 tokenize(items, key, ':');
145 vector<string>::iterator j = items.begin();
146 vector<string>::iterator end = items.end();
147 for (; j != end; ++i)
148 out << setw(10) << *j;
149 out << setw(11) << count;
150 out << setw(14) << percent << endl;
171 m_missMachLatencyHist.push_back(new Stats::Histogram());
172 m_missMachLatencyHist[i]
173 ->init(10)
174 .name(pName + csprintf(".%s.miss_mach_latency_hist",
175 MachineType(i)))
176 .desc("")
177 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
178
179 m_IssueToInitialDelayHist.push_back(new Stats::Histogram());
180 m_IssueToInitialDelayHist[i]
181 ->init(10)
182 .name(pName + csprintf(
183 ".%s.miss_latency_hist.issue_to_initial_request",
184 MachineType(i)))
185 .desc("")
186 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
187
188 m_InitialToForwardDelayHist.push_back(new Stats::Histogram());
189 m_InitialToForwardDelayHist[i]
190 ->init(10)
191 .name(pName + csprintf(".%s.miss_latency_hist.initial_to_forward",
192 MachineType(i)))
193 .desc("")
194 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
195
196 m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram());
197 m_ForwardToFirstResponseDelayHist[i]
198 ->init(10)
199 .name(pName + csprintf(
200 ".%s.miss_latency_hist.forward_to_first_response",
201 MachineType(i)))
202 .desc("")
203 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
204
205 m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram());
206 m_FirstResponseToCompletionDelayHist[i]
207 ->init(10)
208 .name(pName + csprintf(
209 ".%s.miss_latency_hist.first_response_to_completion",
210 MachineType(i)))
211 .desc("")
212 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
213
214 m_IncompleteTimes[i]
215 .name(pName + csprintf(".%s.incomplete_times", MachineType(i)))
216 .desc("")
217 .flags(Stats::nozero);
151 }
218 }
152 out << endl;
219
220 for (int i = 0; i < RubyRequestType_NUM; i++) {
221 m_hitTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
222 m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>());
223
224 for (int j = 0; j < MachineType_NUM; j++) {
225 m_hitTypeMachLatencyHist[i].push_back(new Stats::Histogram());
226 m_hitTypeMachLatencyHist[i][j]
227 ->init(10)
228 .name(pName + csprintf(".%s.%s.hit_type_mach_latency_hist",
229 RubyRequestType(i), MachineType(j)))
230 .desc("")
231 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
232
233 m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram());
234 m_missTypeMachLatencyHist[i][j]
235 ->init(10)
236 .name(pName + csprintf(".%s.%s.miss_type_mach_latency_hist",
237 RubyRequestType(i), MachineType(j)))
238 .desc("")
239 .flags(Stats::nozero | Stats::pdf | Stats::oneline);
240 }
241 }
153}
154
155void
242}
243
244void
156Profiler::printDelayProfile(ostream &out) const
245Profiler::collateStats()
157{
246{
158 out << "Message Delayed Cycles" << endl;
159 out << "----------------------" << endl;
247 if (!m_all_instructions) {
248 m_address_profiler_ptr->collateStats();
249 }
160
250
161 uint32_t numVNets = Network::getNumberOfVirtualNetworks();
162 Histogram delayHistogram;
163 std::vector<Histogram> delayVCHistogram(numVNets);
251 if (m_all_instructions) {
252 m_inst_profiler_ptr->collateStats();
253 }
164
254
255 uint32_t numVNets = Network::getNumberOfVirtualNetworks();
165 for (uint32_t i = 0; i < MachineType_NUM; i++) {
166 for (map<uint32_t, AbstractController*>::iterator it =
167 g_abs_controls[i].begin();
168 it != g_abs_controls[i].end(); ++it) {
169
170 AbstractController *ctr = (*it).second;
171 delayHistogram.add(ctr->getDelayHist());
172
173 for (uint32_t i = 0; i < numVNets; i++) {
256 for (uint32_t i = 0; i < MachineType_NUM; i++) {
257 for (map<uint32_t, AbstractController*>::iterator it =
258 g_abs_controls[i].begin();
259 it != g_abs_controls[i].end(); ++it) {
260
261 AbstractController *ctr = (*it).second;
262 delayHistogram.add(ctr->getDelayHist());
263
264 for (uint32_t i = 0; i < numVNets; i++) {
174 delayVCHistogram[i].add(ctr->getDelayVCHist(i));
265 delayVCHistogram[i]->add(ctr->getDelayVCHist(i));
175 }
176 }
177 }
178
266 }
267 }
268 }
269
179 out << "Total_delay_cycles: " << delayHistogram << endl;
180
181 for (int i = 0; i < numVNets; i++) {
182 out << " virtual_network_" << i << "_delay_cycles: "
183 << delayVCHistogram[i] << endl;
184 }
185}
186
187void
188Profiler::printOutstandingReqProfile(ostream &out) const
189{
190 Histogram sequencerRequests;
191
192 for (uint32_t i = 0; i < MachineType_NUM; i++) {
193 for (map<uint32_t, AbstractController*>::iterator it =
270 for (uint32_t i = 0; i < MachineType_NUM; i++) {
271 for (map<uint32_t, AbstractController*>::iterator it =
194 g_abs_controls[i].begin();
195 it != g_abs_controls[i].end(); ++it) {
272 g_abs_controls[i].begin();
273 it != g_abs_controls[i].end(); ++it) {
196
197 AbstractController *ctr = (*it).second;
198 Sequencer *seq = ctr->getSequencer();
199 if (seq != NULL) {
274
275 AbstractController *ctr = (*it).second;
276 Sequencer *seq = ctr->getSequencer();
277 if (seq != NULL) {
200 sequencerRequests.add(seq->getOutstandReqHist());
278 m_outstandReqHist.add(seq->getOutstandReqHist());
201 }
202 }
203 }
204
279 }
280 }
281 }
282
205 out << "sequencer_requests_outstanding: "
206 << sequencerRequests << endl;
207}
208
209void
210Profiler::printMissLatencyProfile(ostream &out) const
211{
212 // Collate the miss latencies histograms from all the sequencers
213 Histogram latency_hist;
214 std::vector<Histogram> type_latency_hist(RubyRequestType_NUM);
215
216 Histogram hit_latency_hist;
217 std::vector<Histogram> hit_type_latency_hist(RubyRequestType_NUM);
218
219 std::vector<Histogram> hit_mach_latency_hist(MachineType_NUM);
220 std::vector<std::vector<Histogram> >
221 hit_type_mach_latency_hist(RubyRequestType_NUM,
222 std::vector<Histogram>(MachineType_NUM));
223
224 Histogram miss_latency_hist;
225 std::vector<Histogram> miss_type_latency_hist(RubyRequestType_NUM);
226
227 std::vector<Histogram> miss_mach_latency_hist(MachineType_NUM);
228 std::vector<std::vector<Histogram> >
229 miss_type_mach_latency_hist(RubyRequestType_NUM,
230 std::vector<Histogram>(MachineType_NUM));
231
232 std::vector<Histogram> issue_to_initial_delay_hist(MachineType_NUM);
233 std::vector<Histogram> initial_to_forward_delay_hist(MachineType_NUM);
234 std::vector<Histogram>
235 forward_to_first_response_delay_hist(MachineType_NUM);
236 std::vector<Histogram>
237 first_response_to_completion_delay_hist(MachineType_NUM);
238 std::vector<uint64_t> incomplete_times(MachineType_NUM);
239
240 for (uint32_t i = 0; i < MachineType_NUM; i++) {
241 for (map<uint32_t, AbstractController*>::iterator it =
283 for (uint32_t i = 0; i < MachineType_NUM; i++) {
284 for (map<uint32_t, AbstractController*>::iterator it =
242 g_abs_controls[i].begin();
243 it != g_abs_controls[i].end(); ++it) {
285 g_abs_controls[i].begin();
286 it != g_abs_controls[i].end(); ++it) {
244
245 AbstractController *ctr = (*it).second;
246 Sequencer *seq = ctr->getSequencer();
247 if (seq != NULL) {
248 // add all the latencies
287
288 AbstractController *ctr = (*it).second;
289 Sequencer *seq = ctr->getSequencer();
290 if (seq != NULL) {
291 // add all the latencies
249 latency_hist.add(seq->getLatencyHist());
250 hit_latency_hist.add(seq->getHitLatencyHist());
251 miss_latency_hist.add(seq->getMissLatencyHist());
292 m_latencyHist.add(seq->getLatencyHist());
293 m_hitLatencyHist.add(seq->getHitLatencyHist());
294 m_missLatencyHist.add(seq->getMissLatencyHist());
252
253 // add the per request type latencies
254 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
295
296 // add the per request type latencies
297 for (uint32_t j = 0; j < RubyRequestType_NUM; ++j) {
255 type_latency_hist[j]
256 .add(seq->getTypeLatencyHist(j));
257 hit_type_latency_hist[j]
258 .add(seq->getHitTypeLatencyHist(j));
259 miss_type_latency_hist[j]
260 .add(seq->getMissTypeLatencyHist(j));
298 m_typeLatencyHist[j]
299 ->add(seq->getTypeLatencyHist(j));
300 m_hitTypeLatencyHist[j]
301 ->add(seq->getHitTypeLatencyHist(j));
302 m_missTypeLatencyHist[j]
303 ->add(seq->getMissTypeLatencyHist(j));
261 }
262
263 // add the per machine type miss latencies
264 for (uint32_t j = 0; j < MachineType_NUM; ++j) {
304 }
305
306 // add the per machine type miss latencies
307 for (uint32_t j = 0; j < MachineType_NUM; ++j) {
265 hit_mach_latency_hist[j]
266 .add(seq->getHitMachLatencyHist(j));
267 miss_mach_latency_hist[j]
268 .add(seq->getMissMachLatencyHist(j));
308 m_hitMachLatencyHist[j]
309 ->add(seq->getHitMachLatencyHist(j));
310 m_missMachLatencyHist[j]
311 ->add(seq->getMissMachLatencyHist(j));
269
312
270 issue_to_initial_delay_hist[j].add(
313 m_IssueToInitialDelayHist[j]->add(
271 seq->getIssueToInitialDelayHist(MachineType(j)));
272
314 seq->getIssueToInitialDelayHist(MachineType(j)));
315
273 initial_to_forward_delay_hist[j].add(
316 m_InitialToForwardDelayHist[j]->add(
274 seq->getInitialToForwardDelayHist(MachineType(j)));
317 seq->getInitialToForwardDelayHist(MachineType(j)));
275 forward_to_first_response_delay_hist[j].add(seq->
318 m_ForwardToFirstResponseDelayHist[j]->add(seq->
276 getForwardRequestToFirstResponseHist(MachineType(j)));
277
319 getForwardRequestToFirstResponseHist(MachineType(j)));
320
278 first_response_to_completion_delay_hist[j].add(seq->
279 getFirstResponseToCompletionDelayHist(MachineType(j)));
280 incomplete_times[j] +=
321 m_FirstResponseToCompletionDelayHist[j]->add(seq->
322 getFirstResponseToCompletionDelayHist(
323 MachineType(j)));
324 m_IncompleteTimes[j] +=
281 seq->getIncompleteTimes(MachineType(j));
282 }
283
284 // add the per (request, machine) type miss latencies
285 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
286 for (uint32_t k = 0; k < MachineType_NUM; k++) {
325 seq->getIncompleteTimes(MachineType(j));
326 }
327
328 // add the per (request, machine) type miss latencies
329 for (uint32_t j = 0; j < RubyRequestType_NUM; j++) {
330 for (uint32_t k = 0; k < MachineType_NUM; k++) {
287 hit_type_mach_latency_hist[j][k].add(
288 seq->getHitTypeMachLatencyHist(j,k));
289 miss_type_mach_latency_hist[j][k].add(
290 seq->getMissTypeMachLatencyHist(j,k));
331 m_hitTypeMachLatencyHist[j][k]->add(
332 seq->getHitTypeMachLatencyHist(j,k));
333 m_missTypeMachLatencyHist[j][k]->add(
334 seq->getMissTypeMachLatencyHist(j,k));
291 }
292 }
293 }
294 }
295 }
335 }
336 }
337 }
338 }
339 }
296
297 out << "latency: " << latency_hist << endl;
298 for (int i = 0; i < RubyRequestType_NUM; i++) {
299 if (type_latency_hist[i].size() > 0) {
300 out << "latency: " << RubyRequestType(i) << ": "
301 << type_latency_hist[i] << endl;
302 }
303 }
304
305 out << "hit latency: " << hit_latency_hist << endl;
306 for (int i = 0; i < RubyRequestType_NUM; i++) {
307 if (hit_type_latency_hist[i].size() > 0) {
308 out << "hit latency: " << RubyRequestType(i) << ": "
309 << hit_type_latency_hist[i] << endl;
310 }
311 }
312
313 for (int i = 0; i < MachineType_NUM; i++) {
314 if (hit_mach_latency_hist[i].size() > 0) {
315 out << "hit latency: " << MachineType(i) << ": "
316 << hit_mach_latency_hist[i] << endl;
317 }
318 }
319
320 for (int i = 0; i < RubyRequestType_NUM; i++) {
321 for (int j = 0; j < MachineType_NUM; j++) {
322 if (hit_type_mach_latency_hist[i][j].size() > 0) {
323 out << "hit latency: " << RubyRequestType(i)
324 << ": " << MachineType(j) << ": "
325 << hit_type_mach_latency_hist[i][j] << endl;
326 }
327 }
328 }
329
330 out << "miss latency: " << miss_latency_hist << endl;
331 for (int i = 0; i < RubyRequestType_NUM; i++) {
332 if (miss_type_latency_hist[i].size() > 0) {
333 out << "miss latency: " << RubyRequestType(i) << ": "
334 << miss_type_latency_hist[i] << endl;
335 }
336 }
337
338 for (int i = 0; i < MachineType_NUM; i++) {
339 if (miss_mach_latency_hist[i].size() > 0) {
340 out << "miss latency: " << MachineType(i) << ": "
341 << miss_mach_latency_hist[i] << endl;
342
343 out << "miss latency: " << MachineType(i)
344 << "::issue_to_initial_request: "
345 << issue_to_initial_delay_hist[i] << endl;
346 out << "miss latency: " << MachineType(i)
347 << "::initial_to_forward_request: "
348 << initial_to_forward_delay_hist[i] << endl;
349 out << "miss latency: " << MachineType(i)
350 << "::forward_to_first_response: "
351 << forward_to_first_response_delay_hist[i] << endl;
352 out << "miss latency: " << MachineType(i)
353 << "::first_response_to_completion: "
354 << first_response_to_completion_delay_hist[i] << endl;
355 out << "incomplete times: " << incomplete_times[i] << endl;
356 }
357 }
358
359 for (int i = 0; i < RubyRequestType_NUM; i++) {
360 for (int j = 0; j < MachineType_NUM; j++) {
361 if (miss_type_mach_latency_hist[i][j].size() > 0) {
362 out << "miss latency: " << RubyRequestType(i)
363 << ": " << MachineType(j) << ": "
364 << miss_type_mach_latency_hist[i][j] << endl;
365 }
366 }
367 }
368
369 out << endl;
370}
371
372void
340}
341
342void
373Profiler::printStats(ostream& out, bool short_stats)
374{
375 out << endl;
376 if (short_stats) {
377 out << "SHORT ";
378 }
379 out << "Profiler Stats" << endl;
380 out << "--------------" << endl;
381
382 Cycles ruby_cycles = g_system_ptr->curCycle()-m_ruby_start;
383
384 out << "Ruby_current_time: " << g_system_ptr->curCycle() << endl;
385 out << "Ruby_start_time: " << m_ruby_start << endl;
386 out << "Ruby_cycles: " << ruby_cycles << endl;
387 out << endl;
388
389 if (!short_stats) {
390 out << "Busy Controller Counts:" << endl;
391 for (uint32_t i = 0; i < MachineType_NUM; i++) {
392 uint32_t size = MachineType_base_count((MachineType)i);
393
394 for (uint32_t j = 0; j < size; j++) {
395 MachineID machID;
396 machID.type = (MachineType)i;
397 machID.num = j;
398
399 AbstractController *ctr =
400 (*(g_abs_controls[i].find(j))).second;
401 out << machID << ":" << ctr->getFullyBusyCycles() << " ";
402 if ((j + 1) % 8 == 0) {
403 out << endl;
404 }
405 }
406 out << endl;
407 }
408 out << endl;
409
410 out << "Busy Bank Count:" << m_busyBankCount << endl;
411 out << endl;
412
413 printOutstandingReqProfile(out);
414 out << endl;
415 }
416
417 if (!short_stats) {
418 out << "All Non-Zero Cycle Demand Cache Accesses" << endl;
419 out << "----------------------------------------" << endl;
420 printMissLatencyProfile(out);
421
422 if (m_all_sharing_histogram.size() > 0) {
423 out << "all_sharing: " << m_all_sharing_histogram << endl;
424 out << "read_sharing: " << m_read_sharing_histogram << endl;
425 out << "write_sharing: " << m_write_sharing_histogram << endl;
426
427 out << "all_sharing_percent: ";
428 m_all_sharing_histogram.printPercent(out);
429 out << endl;
430
431 out << "read_sharing_percent: ";
432 m_read_sharing_histogram.printPercent(out);
433 out << endl;
434
435 out << "write_sharing_percent: ";
436 m_write_sharing_histogram.printPercent(out);
437 out << endl;
438
439 int64 total_miss = m_cache_to_cache + m_memory_to_cache;
440 out << "all_misses: " << total_miss << endl;
441 out << "cache_to_cache_misses: " << m_cache_to_cache << endl;
442 out << "memory_to_cache_misses: " << m_memory_to_cache << endl;
443 out << "cache_to_cache_percent: "
444 << 100.0 * (double(m_cache_to_cache) / double(total_miss))
445 << endl;
446 out << "memory_to_cache_percent: "
447 << 100.0 * (double(m_memory_to_cache) / double(total_miss))
448 << endl;
449 out << endl;
450 }
451
452 printRequestProfile(out);
453
454 if (!m_all_instructions) {
455 m_address_profiler_ptr->printStats(out);
456 }
457
458 if (m_all_instructions) {
459 m_inst_profiler_ptr->printStats(out);
460 }
461
462 out << endl;
463 printDelayProfile(out);
464 }
465}
466
467void
468Profiler::clearStats()
469{
470 m_ruby_start = g_system_ptr->curCycle();
471 m_real_time_start_time = time(NULL);
472
473 m_busyBankCount = 0;
474 m_read_sharing_histogram.clear();
475 m_write_sharing_histogram.clear();
476 m_all_sharing_histogram.clear();
477 m_cache_to_cache = 0;
478 m_memory_to_cache = 0;
479
480 // update the start time
481 m_ruby_start = g_system_ptr->curCycle();
482}
483
484void
485Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
486{
487 if (msg.getType() != RubyRequestType_IFETCH) {
488 // Note: The following line should be commented out if you
489 // want to use the special profiling that is part of the GS320
490 // protocol
491
492 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
493 // profiled by the AddressProfiler
494 m_address_profiler_ptr->
495 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
496 msg.getType(), msg.getAccessMode(), id, false);
497 }
498}
343Profiler::addAddressTraceSample(const RubyRequest& msg, NodeID id)
344{
345 if (msg.getType() != RubyRequestType_IFETCH) {
346 // Note: The following line should be commented out if you
347 // want to use the special profiling that is part of the GS320
348 // protocol
349
350 // NOTE: Unless PROFILE_HOT_LINES is enabled, nothing will be
351 // profiled by the AddressProfiler
352 m_address_profiler_ptr->
353 addTraceSample(msg.getLineAddress(), msg.getProgramCounter(),
354 msg.getType(), msg.getAccessMode(), id, false);
355 }
356}
499
500void
501Profiler::profileSharing(const Address& addr, AccessType type,
502 NodeID requestor, const Set& sharers,
503 const Set& owner)
504{
505 Set set_contacted(owner);
506 if (type == AccessType_Write) {
507 set_contacted.addSet(sharers);
508 }
509 set_contacted.remove(requestor);
510 int number_contacted = set_contacted.count();
511
512 if (type == AccessType_Write) {
513 m_write_sharing_histogram.add(number_contacted);
514 } else {
515 m_read_sharing_histogram.add(number_contacted);
516 }
517 m_all_sharing_histogram.add(number_contacted);
518
519 if (number_contacted == 0) {
520 m_memory_to_cache++;
521 } else {
522 m_cache_to_cache++;
523 }
524}
525
526void
527Profiler::bankBusy()
528{
529 m_busyBankCount++;
530}
531
532void
533Profiler::rubyWatch(int id)
534{
535 uint64 tr = 0;
536 Address watch_address = Address(tr);
537
538 DPRINTFN("%7s %3s RUBY WATCH %d\n", g_system_ptr->curCycle(), id,
539 watch_address);
540
541 // don't care about success or failure
542 m_watch_address_set.insert(watch_address);
543}
544
545bool
546Profiler::watchAddress(Address addr)
547{
548 return m_watch_address_set.count(addr) > 0;
549}
550
551Profiler *
552RubyProfilerParams::create()
553{
554 return new Profiler(this);
555}