elastic_trace.hh revision 13429
1/* 2 * Copyright (c) 2013 - 2015 ARM Limited 3 * All rights reserved 4 * 5 * The license below extends only to copyright in the software and shall 6 * not be construed as granting a license to any other intellectual 7 * property including but not limited to intellectual property relating 8 * to a hardware implementation of the functionality of the software 9 * licensed hereunder. You may use the software subject to the license 10 * terms below provided that you ensure that this notice is replicated 11 * unmodified and in its entirety in all distributions of the software, 12 * modified or unmodified, in source code or in binary form. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions are 16 * met: redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer; 18 * redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution; 21 * neither the name of the copyright holders nor the names of its 22 * contributors may be used to endorse or promote products derived from 23 * this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 * 37 * Authors: Radhika Jagtap 38 * Andreas Hansson 39 * Thomas Grass 40 */ 41 42/** 43 * @file This file describes a trace component which is a cpu probe listener 44 * used to generate elastic cpu traces. It registers listeners to probe points 45 * in the fetch, rename, iew and commit stages of the O3CPU. It processes the 46 * dependency graph of the cpu execution and writes out a protobuf trace. It 47 * also generates a protobuf trace of the instruction fetch requests. 48 */ 49 50#ifndef __CPU_O3_PROBE_ELASTIC_TRACE_HH__ 51#define __CPU_O3_PROBE_ELASTIC_TRACE_HH__ 52 53#include <set> 54#include <unordered_map> 55#include <utility> 56 57#include "cpu/o3/dyn_inst.hh" 58#include "cpu/o3/impl.hh" 59#include "mem/request.hh" 60#include "params/ElasticTrace.hh" 61#include "proto/inst_dep_record.pb.h" 62#include "proto/packet.pb.h" 63#include "proto/protoio.hh" 64#include "sim/eventq.hh" 65#include "sim/probe/probe.hh" 66 67/** 68 * The elastic trace is a type of probe listener and listens to probe points 69 * in multiple stages of the O3CPU. The notify method is called on a probe 70 * point typically when an instruction successfully progresses through that 71 * stage. 72 * 73 * As different listener methods mapped to the different probe points execute, 74 * relevant information about the instruction, e.g. timestamps and register 75 * accesses, are captured and stored in temporary data structures. When the 76 * instruction progresses through the commit stage, the timing as well as 77 * dependency information about the instruction is finalised and encapsulated in 78 * a struct called TraceInfo. TraceInfo objects are collected in a list instead 79 * of writing them out to the trace file one a time. This is required as the 80 * trace is processed in chunks to evaluate order dependencies and computational 81 * delay in case an instruction does not have any register dependencies. By this 82 * we achieve a simpler algorithm during replay because every record in the 83 * trace can be hooked onto a record in its past. The trace is written out as 84 * a protobuf format output file. 85 * 86 * The output trace can be read in and played back by the TraceCPU. 87 */ 88class ElasticTrace : public ProbeListenerObject 89{ 90 91 public: 92 typedef typename O3CPUImpl::DynInstPtr DynInstPtr; 93 typedef typename O3CPUImpl::DynInstConstPtr DynInstConstPtr; 94 typedef typename std::pair<InstSeqNum, PhysRegIndex> SeqNumRegPair; 95 96 /** Trace record types corresponding to instruction node types */ 97 typedef ProtoMessage::InstDepRecord::RecordType RecordType; 98 typedef ProtoMessage::InstDepRecord Record; 99 100 /** Constructor */ 101 ElasticTrace(const ElasticTraceParams *params); 102 103 /** 104 * Register the probe listeners that is the methods called on a probe point 105 * notify() call. 106 */ 107 void regProbeListeners(); 108 109 /** Register all listeners. */ 110 void regEtraceListeners(); 111 112 /** Returns the name of the trace probe listener. */ 113 const std::string name() const; 114 115 /** 116 * Process any outstanding trace records, flush them out to the protobuf 117 * output streams and delete the streams at simulation exit. 118 */ 119 void flushTraces(); 120 121 /** 122 * Take the fields of the request class object that are relevant to create 123 * an instruction fetch request. It creates a protobuf message containing 124 * the request fields and writes it to instTraceStream. 125 * 126 * @param req pointer to the fetch request 127 */ 128 void fetchReqTrace(const RequestPtr &req); 129 130 /** 131 * Populate the execute timestamp field in an InstExecInfo object for an 132 * instruction in flight. 133 * 134 * @param dyn_inst pointer to dynamic instruction in flight 135 */ 136 void recordExecTick(const DynInstConstPtr& dyn_inst); 137 138 /** 139 * Populate the timestamp field in an InstExecInfo object for an 140 * instruction in flight when it is execution is complete and it is ready 141 * to commit. 142 * 143 * @param dyn_inst pointer to dynamic instruction in flight 144 */ 145 void recordToCommTick(const DynInstConstPtr& dyn_inst); 146 147 /** 148 * Record a Read After Write physical register dependency if there has 149 * been a write to the source register and update the physical register 150 * map. For this look up the physRegDepMap with this instruction as the 151 * writer of its destination register. If the dependency falls outside the 152 * window it is assumed as already complete. Duplicate entries are avoided. 153 * 154 * @param dyn_inst pointer to dynamic instruction in flight 155 */ 156 void updateRegDep(const DynInstConstPtr& dyn_inst); 157 158 /** 159 * When an instruction gets squashed the destination register mapped to it 160 * is freed up in the rename stage. Remove the register entry from the 161 * physRegDepMap as well to avoid dependencies on squashed instructions. 162 * 163 * @param inst_reg_pair pair of inst. sequence no. and the register 164 */ 165 void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair); 166 167 /** 168 * Add an instruction that is at the head of the ROB and is squashed only 169 * if it is a load and a request was sent for it. 170 * 171 * @param head_inst pointer to dynamic instruction to be squashed 172 */ 173 void addSquashedInst(const DynInstConstPtr& head_inst); 174 175 /** 176 * Add an instruction that is at the head of the ROB and is committed. 177 * 178 * @param head_inst pointer to dynamic instruction to be committed 179 */ 180 void addCommittedInst(const DynInstConstPtr& head_inst); 181 182 /** Register statistics for the elastic trace. */ 183 void regStats(); 184 185 /** Event to trigger registering this listener for all probe points. */ 186 EventFunctionWrapper regEtraceListenersEvent; 187 188 private: 189 /** 190 * Used for checking the first window for processing and writing of 191 * dependency trace. At the start of the program there can be dependency- 192 * free instructions and such cases are handled differently. 193 */ 194 bool firstWin; 195 196 /** 197 * @defgroup InstExecInfo Struct for storing information before an 198 * instruction reaches the commit stage, e.g. execute timestamp. 199 */ 200 struct InstExecInfo 201 { 202 /** 203 * @ingroup InstExecInfo 204 * @{ 205 */ 206 /** Timestamp when instruction was first processed by execute stage */ 207 Tick executeTick; 208 /** 209 * Timestamp when instruction execution is completed in execute stage 210 * and instruction is marked as ready to commit 211 */ 212 Tick toCommitTick; 213 /** 214 * Set of instruction sequence numbers that this instruction depends on 215 * due to Read After Write data dependency based on physical register. 216 */ 217 std::set<InstSeqNum> physRegDepSet; 218 /** @} */ 219 220 /** Constructor */ 221 InstExecInfo() 222 : executeTick(MaxTick), 223 toCommitTick(MaxTick) 224 { } 225 }; 226 227 /** 228 * Temporary store of InstExecInfo objects. Later on when an instruction 229 * is processed for commit or retire, if it is chosen to be written to 230 * the output trace then this information is looked up using the instruction 231 * sequence number as the key. If it is not chosen then the entry for it in 232 * the store is cleared. 233 */ 234 std::unordered_map<InstSeqNum, InstExecInfo*> tempStore; 235 236 /** 237 * The last cleared instruction sequence number used to free up the memory 238 * allocated in the temporary store. 239 */ 240 InstSeqNum lastClearedSeqNum; 241 242 /** 243 * Map for recording the producer of a physical register to check Read 244 * After Write dependencies. The key is the renamed physical register and 245 * the value is the instruction sequence number of its last producer. 246 */ 247 std::unordered_map<PhysRegIndex, InstSeqNum> physRegDepMap; 248 249 /** 250 * @defgroup TraceInfo Struct for a record in the instruction dependency 251 * trace. All information required to process and calculate the 252 * computational delay is stored in TraceInfo objects. The memory request 253 * fields for a load or store instruction are also included here. Note 254 * that the structure TraceInfo does not store pointers to children 255 * or parents. The dependency trace is maintained as an ordered collection 256 * of records for writing to the output trace and not as a tree data 257 * structure. 258 */ 259 struct TraceInfo 260 { 261 /** 262 * @ingroup TraceInfo 263 * @{ 264 */ 265 /* Instruction sequence number. */ 266 InstSeqNum instNum; 267 /** The type of trace record for the instruction node */ 268 RecordType type; 269 /* Tick when instruction was in execute stage. */ 270 Tick executeTick; 271 /* Tick when instruction was marked ready and sent to commit stage. */ 272 Tick toCommitTick; 273 /* Tick when instruction was committed. */ 274 Tick commitTick; 275 /* If instruction was committed, as against squashed. */ 276 bool commit; 277 /* List of order dependencies. */ 278 std::list<InstSeqNum> robDepList; 279 /* List of physical register RAW dependencies. */ 280 std::list<InstSeqNum> physRegDepList; 281 /** 282 * Computational delay after the last dependent inst. completed. 283 * A value of -1 which means instruction has no dependencies. 284 */ 285 int64_t compDelay; 286 /* Number of dependents. */ 287 uint32_t numDepts; 288 /* The instruction PC for a load, store or non load/store. */ 289 Addr pc; 290 /* Request flags in case of a load/store instruction */ 291 Request::FlagsType reqFlags; 292 /* Request physical address in case of a load/store instruction */ 293 Addr physAddr; 294 /* Request virtual address in case of a load/store instruction */ 295 Addr virtAddr; 296 /* Address space id in case of a load/store instruction */ 297 uint32_t asid; 298 /* Request size in case of a load/store instruction */ 299 unsigned size; 300 /** Default Constructor */ 301 TraceInfo() 302 : type(Record::INVALID) 303 { } 304 /** Is the record a load */ 305 bool isLoad() const { return (type == Record::LOAD); } 306 /** Is the record a store */ 307 bool isStore() const { return (type == Record::STORE); } 308 /** Is the record a fetch triggering an Icache request */ 309 bool isComp() const { return (type == Record::COMP); } 310 /** Return string specifying the type of the node */ 311 const std::string& typeToStr() const; 312 /** @} */ 313 314 /** 315 * Get the execute tick of the instruction. 316 * 317 * @return Tick when instruction was executed 318 */ 319 Tick getExecuteTick() const; 320 }; 321 322 /** 323 * The instruction dependency trace containing TraceInfo objects. The 324 * container implemented is sequential as dependencies obey commit 325 * order (program order). For example, if B is dependent on A then B must 326 * be committed after A. Thus records are updated with dependency 327 * information and written to the trace in commit order. This ensures that 328 * when a graph is reconstructed from the trace during replay, all the 329 * dependencies are stored in the graph before the dependent itself is 330 * added. This facilitates creating a tree data structure during replay, 331 * i.e. adding children as records are read from the trace in an efficient 332 * manner. 333 */ 334 std::vector<TraceInfo*> depTrace; 335 336 /** 337 * Map where the instruction sequence number is mapped to the pointer to 338 * the TraceInfo object. 339 */ 340 std::unordered_map<InstSeqNum, TraceInfo*> traceInfoMap; 341 342 /** Typedef of iterator to the instruction dependency trace. */ 343 typedef typename std::vector<TraceInfo*>::iterator depTraceItr; 344 345 /** Typedef of the reverse iterator to the instruction dependency trace. */ 346 typedef typename std::reverse_iterator<depTraceItr> depTraceRevItr; 347 348 /** 349 * The maximum distance for a dependency and is set by a top level 350 * level parameter. It must be equal to or greater than the number of 351 * entries in the ROB. This variable is used as the length of the sliding 352 * window for processing the dependency trace. 353 */ 354 uint32_t depWindowSize; 355 356 /** Protobuf output stream for data dependency trace */ 357 ProtoOutputStream* dataTraceStream; 358 359 /** Protobuf output stream for instruction fetch trace. */ 360 ProtoOutputStream* instTraceStream; 361 362 /** Number of instructions after which to enable tracing. */ 363 const InstSeqNum startTraceInst; 364 365 /** 366 * Whther the elastic trace listener has been registered for all probes. 367 * 368 * When enabling tracing after a specified number of instructions have 369 * committed, check this to prevent re-registering the listener. 370 */ 371 bool allProbesReg; 372 373 /** Whether to trace virtual addresses for memory requests. */ 374 const bool traceVirtAddr; 375 376 /** Pointer to the O3CPU that is this listener's parent a.k.a. manager */ 377 FullO3CPU<O3CPUImpl>* cpu; 378 379 /** 380 * Add a record to the dependency trace depTrace which is a sequential 381 * container. A record is inserted per committed instruction and in the same 382 * order as the order in which instructions are committed. 383 * 384 * @param head_inst Pointer to the instruction which is head of the 385 * ROB and ready to commit 386 * @param exec_info_ptr Pointer to InstExecInfo for that instruction 387 * @param commit True if instruction is committed, false if squashed 388 */ 389 void addDepTraceRecord(const DynInstConstPtr& head_inst, 390 InstExecInfo* exec_info_ptr, bool commit); 391 392 /** 393 * Clear entries in the temporary store of execution info objects to free 394 * allocated memory until the present instruction being added to the trace. 395 * 396 * @param head_inst pointer to dynamic instruction 397 */ 398 void clearTempStoreUntil(const DynInstConstPtr& head_inst); 399 400 /** 401 * Calculate the computational delay between an instruction and a 402 * subsequent instruction that has an ROB (order) dependency on it 403 * 404 * @param past_record Pointer to instruction 405 * 406 * @param new_record Pointer to subsequent instruction having an ROB 407 * dependency on the instruction pointed to by 408 * past_record 409 */ 410 void compDelayRob(TraceInfo* past_record, TraceInfo* new_record); 411 412 /** 413 * Calculate the computational delay between an instruction and a 414 * subsequent instruction that has a Physical Register (data) dependency on 415 * it. 416 * 417 * @param past_record Pointer to instruction 418 * 419 * @param new_record Pointer to subsequent instruction having a Physical 420 * Register dependency on the instruction pointed to 421 * by past_record 422 */ 423 void compDelayPhysRegDep(TraceInfo* past_record, TraceInfo* new_record); 424 425 /** 426 * Write out given number of records to the trace starting with the first 427 * record in depTrace and iterating through the trace in sequence. A 428 * record is deleted after it is written. 429 * 430 * @param num_to_write Number of records to write to the trace 431 */ 432 void writeDepTrace(uint32_t num_to_write); 433 434 /** 435 * Reverse iterate through the graph, search for a store-after-store or 436 * store-after-load dependency and update the new node's Rob dependency list. 437 * 438 * If a dependency is found, then call the assignRobDep() method that 439 * updates the store with the dependency information. This function is only 440 * called when a new store node is added to the trace. 441 * 442 * @param new_record pointer to new store record 443 * @param find_load_not_store true for searching store-after-load and false 444 * for searching store-after-store dependency 445 */ 446 void updateCommitOrderDep(TraceInfo* new_record, bool find_load_not_store); 447 448 /** 449 * Reverse iterate through the graph, search for an issue order dependency 450 * for a new node and update the new node's Rob dependency list. 451 * 452 * If a dependency is found, call the assignRobDep() method that updates 453 * the node with its dependency information. This function is called in 454 * case a new node to be added to the trace is dependency-free or its 455 * dependency got discarded because the dependency was outside the window. 456 * 457 * @param new_record pointer to new record to be added to the trace 458 */ 459 void updateIssueOrderDep(TraceInfo* new_record); 460 461 /** 462 * The new_record has an order dependency on a past_record, thus update the 463 * new record's Rob dependency list and increment the number of dependents 464 * of the past record. 465 * 466 * @param new_record pointer to new record 467 * @param past_record pointer to record that new_record has a rob 468 * dependency on 469 */ 470 void assignRobDep(TraceInfo* past_record, TraceInfo* new_record); 471 472 /** 473 * Check if past record is a store sent earlier than the execute tick. 474 * 475 * @param past_record pointer to past store 476 * @param execute_tick tick with which to compare past store's commit tick 477 * 478 * @return true if past record is store sent earlier 479 */ 480 bool hasStoreCommitted(TraceInfo* past_record, Tick execute_tick) const; 481 482 /** 483 * Check if past record is a load that completed earlier than the execute 484 * tick. 485 * 486 * @param past_record pointer to past load 487 * @param execute_tick tick with which to compare past load's complete 488 * tick 489 * 490 * @return true if past record is load completed earlier 491 */ 492 bool hasLoadCompleted(TraceInfo* past_record, Tick execute_tick) const; 493 494 /** 495 * Check if past record is a load sent earlier than the execute tick. 496 * 497 * @param past_record pointer to past load 498 * @param execute_tick tick with which to compare past load's send tick 499 * 500 * @return true if past record is load sent earlier 501 */ 502 bool hasLoadBeenSent(TraceInfo* past_record, Tick execute_tick) const; 503 504 /** 505 * Check if past record is a comp node that completed earlier than the 506 * execute tick. 507 * 508 * @param past_record pointer to past comp node 509 * @param execute_tick tick with which to compare past comp node's 510 * completion tick 511 * 512 * @return true if past record is comp completed earlier 513 */ 514 bool hasCompCompleted(TraceInfo* past_record, Tick execute_tick) const; 515 516 /** Number of register dependencies recorded during tracing */ 517 Stats::Scalar numRegDep; 518 519 /** 520 * Number of stores that got assigned a commit order dependency 521 * on a past load/store. 522 */ 523 Stats::Scalar numOrderDepStores; 524 525 /** 526 * Number of load insts that got assigned an issue order dependency 527 * because they were dependency-free. 528 */ 529 Stats::Scalar numIssueOrderDepLoads; 530 531 /** 532 * Number of store insts that got assigned an issue order dependency 533 * because they were dependency-free. 534 */ 535 Stats::Scalar numIssueOrderDepStores; 536 537 /** 538 * Number of non load/store insts that got assigned an issue order 539 * dependency because they were dependency-free. 540 */ 541 Stats::Scalar numIssueOrderDepOther; 542 543 /** Number of filtered nodes */ 544 Stats::Scalar numFilteredNodes; 545 546 /** Maximum number of dependents on any instruction */ 547 Stats::Scalar maxNumDependents; 548 549 /** 550 * Maximum size of the temporary store mostly useful as a check that it is 551 * not growing 552 */ 553 Stats::Scalar maxTempStoreSize; 554 555 /** 556 * Maximum size of the map that holds the last writer to a physical 557 * register. 558 * */ 559 Stats::Scalar maxPhysRegDepMapSize; 560 561}; 562#endif//__CPU_O3_PROBE_ELASTIC_TRACE_HH__ 563