1/* 2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Lisa Hsu, 34 * Sooraj Puthoor 35 */ 36 37/* 38 * This file is based on MOESI_AMD_Base.sm 39 * Differences with AMD base protocol 40 * -- Uses a probe filter memory to track sharers. 41 * -- The probe filter can be inclusive or non-inclusive 42 * -- Only two sharers tracked. Sharers are a) GPU or/and b) CPU 43 * -- If sharer information available, the sharer is probed 44 * -- If sharer information not available, probes are broadcasted 45 */ 46 47machine(MachineType:Directory, "AMD Baseline protocol") 48: DirectoryMemory * directory; 49 CacheMemory * L3CacheMemory; 50 CacheMemory * ProbeFilterMemory; 51 Cycles response_latency := 5; 52 Cycles l3_hit_latency := 50; 53 bool noTCCdir := "False"; 54 bool CAB_TCC := "False"; 55 int TCC_select_num_bits:=1; 56 bool useL3OnWT := "False"; 57 bool inclusiveDir := "True"; 58 Cycles to_memory_controller_latency := 1; 59 60 // From the Cores 61 MessageBuffer * requestFromCores, network="From", virtual_network="0", ordered="false", vnet_type="request"; 62 MessageBuffer * responseFromCores, network="From", virtual_network="2", ordered="false", vnet_type="response"; 63 MessageBuffer * unblockFromCores, network="From", virtual_network="4", ordered="false", vnet_type="unblock"; 64 65 MessageBuffer * probeToCore, network="To", virtual_network="0", ordered="false", vnet_type="request"; 66 MessageBuffer * responseToCore, network="To", virtual_network="2", ordered="false", vnet_type="response"; 67 68 MessageBuffer * triggerQueue, ordered="true"; 69 MessageBuffer * L3triggerQueue, ordered="true"; 70 MessageBuffer * responseFromMemory; 71{ 72 // STATES 73 state_declaration(State, desc="Directory states", default="Directory_State_U") { 74 U, AccessPermission:Backing_Store, desc="unblocked"; 75 BL, AccessPermission:Busy, desc="got L3 WB request"; 76 // BL is Busy because it is busy waiting for the data 77 // which is possibly in the network. The cache which evicted the data 78 // might have moved to some other state after doing the eviction 79 // BS==> Received a read request; has not requested ownership 80 // B==> Received a read request; has requested ownership 81 // BM==> Received a modification request 82 B_P, AccessPermission:Backing_Store, desc="Back invalidation, waiting for probes"; 83 BS_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; 84 BM_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; 85 B_M, AccessPermission:Backing_Store, desc="blocked waiting for memory"; 86 BP, AccessPermission:Backing_Store, desc="blocked waiting for probes, no need for memory"; 87 BS_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; 88 BM_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; 89 B_PM, AccessPermission:Backing_Store, desc="blocked waiting for probes and Memory"; 90 BS_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; 91 BM_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; 92 B_Pm, AccessPermission:Backing_Store, desc="blocked waiting for probes, already got memory"; 93 B, AccessPermission:Backing_Store, desc="sent response, Blocked til ack"; 94 } 95 96 // Events 97 enumeration(Event, desc="Directory events") { 98 // CPU requests 99 RdBlkS, desc="..."; 100 RdBlkM, desc="..."; 101 RdBlk, desc="..."; 102 CtoD, desc="..."; 103 WriteThrough, desc="WriteThrough Message"; 104 Atomic, desc="Atomic Message"; 105 106 // writebacks 107 VicDirty, desc="..."; 108 VicClean, desc="..."; 109 CPUData, desc="WB data from CPU"; 110 StaleWB, desc="Notification that WB has been superceded by a probe"; 111 112 // probe responses 113 CPUPrbResp, desc="Probe Response Msg"; 114 115 ProbeAcksComplete, desc="Probe Acks Complete"; 116 117 L3Hit, desc="Hit in L3 return data to core"; 118 119 // Replacement 120 PF_Repl, desc="Replace address from probe filter"; 121 122 // Memory Controller 123 MemData, desc="Fetched data from memory arrives"; 124 WBAck, desc="Writeback Ack from memory arrives"; 125 126 CoreUnblock, desc="Core received data, unblock"; 127 UnblockWriteThrough, desc="Unblock because of writethrough request finishing"; 128 129 StaleVicDirty, desc="Core invalidated before VicDirty processed"; 130 } 131 132 enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { 133 L3DataArrayRead, desc="Read the data array"; 134 L3DataArrayWrite, desc="Write the data array"; 135 L3TagArrayRead, desc="Read the data array"; 136 L3TagArrayWrite, desc="Write the data array"; 137 138 PFTagArrayRead, desc="Read the data array"; 139 PFTagArrayWrite, desc="Write the data array"; 140 } 141 142 // TYPES 143 144 enumeration(ProbeFilterState, desc="") { 145 T, desc="Tracked"; 146 NT, desc="Not tracked"; 147 B, desc="Blocked, This entry is being replaced"; 148 } 149 150 // DirectoryEntry 151 structure(Entry, desc="...", interface="AbstractEntry") { 152 State DirectoryState, desc="Directory state"; 153 DataBlock DataBlk, desc="data for the block"; 154 NetDest VicDirtyIgnore, desc="VicDirty coming from whom to ignore"; 155 } 156 157 structure(CacheEntry, desc="...", interface="AbstractCacheEntry") { 158 DataBlock DataBlk, desc="data for the block"; 159 MachineID LastSender, desc="Mach which this block came from"; 160 ProbeFilterState pfState, desc="ProbeFilter state",default="Directory_ProbeFilterState_NT"; 161 bool isOnCPU, desc="Block valid in the CPU complex",default="false"; 162 bool isOnGPU, desc="Block valid in the GPU complex",default="false"; 163 } 164 165 structure(TBE, desc="...") { 166 State TBEState, desc="Transient state"; 167 DataBlock DataBlk, desc="data for the block"; 168 bool Dirty, desc="Is the data dirty?"; 169 int NumPendingAcks, desc="num acks expected"; 170 MachineID OriginalRequestor, desc="Original Requestor"; 171 MachineID WTRequestor, desc="WT Requestor"; 172 bool Cached, desc="data hit in Cache"; 173 bool MemData, desc="Got MemData?",default="false"; 174 bool wtData, desc="Got write through data?",default="false"; 175 bool atomicData, desc="Got Atomic op?",default="false"; 176 Cycles InitialRequestTime, desc="..."; 177 Cycles ForwardRequestTime, desc="..."; 178 Cycles ProbeRequestStartTime, desc="..."; 179 MachineID LastSender, desc="Mach which this block came from"; 180 bool L3Hit, default="false", desc="Was this an L3 hit?"; 181 uint64_t probe_id, desc="probe id for lifetime profiling"; 182 WriteMask writeMask, desc="outstanding write through mask"; 183 Addr demandAddress, desc="Address of demand request which caused probe filter eviction"; 184 } 185 186 structure(TBETable, external="yes") { 187 TBE lookup(Addr); 188 void allocate(Addr); 189 void deallocate(Addr); 190 bool isPresent(Addr); 191 } 192 193 TBETable TBEs, template="<Directory_TBE>", constructor="m_number_of_TBEs"; 194 195 int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; 196 197 Tick clockEdge(); 198 Tick cyclesToTicks(Cycles c); 199 200 void set_tbe(TBE a); 201 void unset_tbe(); 202 void wakeUpAllBuffers(); 203 void wakeUpBuffers(Addr a); 204 Cycles curCycle(); 205 MachineID mapAddressToMachine(Addr addr, MachineType mtype); 206 207 Entry getDirectoryEntry(Addr addr), return_by_pointer="yes" { 208 Entry dir_entry := static_cast(Entry, "pointer", directory.lookup(addr)); 209 210 if (is_valid(dir_entry)) { 211 //DPRINTF(RubySlicc, "Getting entry %s: %s\n", addr, dir_entry.DataBlk); 212 return dir_entry; 213 } 214 215 dir_entry := static_cast(Entry, "pointer", 216 directory.allocate(addr, new Entry)); 217 return dir_entry; 218 } 219 220 DataBlock getDataBlock(Addr addr), return_by_ref="yes" { 221 TBE tbe := TBEs.lookup(addr); 222 if (is_valid(tbe) && tbe.MemData) { 223 DPRINTF(RubySlicc, "Returning DataBlk from TBE %s:%s\n", addr, tbe); 224 return tbe.DataBlk; 225 } 226 DPRINTF(RubySlicc, "Returning DataBlk from Dir %s:%s\n", addr, getDirectoryEntry(addr)); 227 return getDirectoryEntry(addr).DataBlk; 228 } 229 230 State getState(TBE tbe, CacheEntry entry, Addr addr) { 231 CacheEntry probeFilterEntry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(addr)); 232 if (inclusiveDir) { 233 if (is_valid(probeFilterEntry) && probeFilterEntry.pfState == ProbeFilterState:B) { 234 return State:B_P; 235 } 236 } 237 return getDirectoryEntry(addr).DirectoryState; 238 } 239 240 void setState(TBE tbe, CacheEntry entry, Addr addr, State state) { 241 getDirectoryEntry(addr).DirectoryState := state; 242 } 243 244 void functionalRead(Addr addr, Packet *pkt) { 245 TBE tbe := TBEs.lookup(addr); 246 if(is_valid(tbe)) { 247 testAndRead(addr, tbe.DataBlk, pkt); 248 } else { 249 functionalMemoryRead(pkt); 250 } 251 } 252 253 int functionalWrite(Addr addr, Packet *pkt) { 254 int num_functional_writes := 0; 255 256 TBE tbe := TBEs.lookup(addr); 257 if(is_valid(tbe)) { 258 num_functional_writes := num_functional_writes + 259 testAndWrite(addr, tbe.DataBlk, pkt); 260 } 261 262 num_functional_writes := num_functional_writes + 263 functionalMemoryWrite(pkt); 264 return num_functional_writes; 265 } 266 267 AccessPermission getAccessPermission(Addr addr) { 268 // For this Directory, all permissions are just tracked in Directory, since 269 // it's not possible to have something in TBE but not Dir, just keep track 270 // of state all in one place. 271 if (directory.isPresent(addr)) { 272 return Directory_State_to_permission(getDirectoryEntry(addr).DirectoryState); 273 } 274 275 return AccessPermission:NotPresent; 276 } 277 278 void setAccessPermission(CacheEntry entry, Addr addr, State state) { 279 getDirectoryEntry(addr).changePermission(Directory_State_to_permission(state)); 280 } 281 282 void recordRequestType(RequestType request_type, Addr addr) { 283 if (request_type == RequestType:L3DataArrayRead) { 284 L3CacheMemory.recordRequestType(CacheRequestType:DataArrayRead, addr); 285 } else if (request_type == RequestType:L3DataArrayWrite) { 286 L3CacheMemory.recordRequestType(CacheRequestType:DataArrayWrite, addr); 287 } else if (request_type == RequestType:L3TagArrayRead) { 288 L3CacheMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); 289 } else if (request_type == RequestType:L3TagArrayWrite) { 290 L3CacheMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); 291 } else if (request_type == RequestType:PFTagArrayRead) { 292 ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayRead, addr); 293 } else if (request_type == RequestType:PFTagArrayWrite) { 294 ProbeFilterMemory.recordRequestType(CacheRequestType:TagArrayWrite, addr); 295 } 296 } 297 298 bool checkResourceAvailable(RequestType request_type, Addr addr) { 299 if (request_type == RequestType:L3DataArrayRead) { 300 return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); 301 } else if (request_type == RequestType:L3DataArrayWrite) { 302 return L3CacheMemory.checkResourceAvailable(CacheResourceType:DataArray, addr); 303 } else if (request_type == RequestType:L3TagArrayRead) { 304 return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); 305 } else if (request_type == RequestType:L3TagArrayWrite) { 306 return L3CacheMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); 307 } else if (request_type == RequestType:PFTagArrayRead) { 308 return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); 309 } else if (request_type == RequestType:PFTagArrayWrite) { 310 return ProbeFilterMemory.checkResourceAvailable(CacheResourceType:TagArray, addr); 311 } else { 312 error("Invalid RequestType type in checkResourceAvailable"); 313 return true; 314 } 315 } 316 317 bool isNotPresentProbeFilter(Addr address) { 318 if (ProbeFilterMemory.isTagPresent(address) || 319 ProbeFilterMemory.cacheAvail(address)) { 320 return false; 321 } 322 return true; 323 } 324 325 bool isGPUSharer(Addr address) { 326 assert(ProbeFilterMemory.isTagPresent(address)); 327 CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); 328 if (entry.pfState == ProbeFilterState:NT) { 329 return true; 330 } else if (entry.isOnGPU){ 331 return true; 332 } 333 return false; 334 } 335 336 bool isCPUSharer(Addr address) { 337 assert(ProbeFilterMemory.isTagPresent(address)); 338 CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); 339 if (entry.pfState == ProbeFilterState:NT) { 340 return true; 341 } else if (entry.isOnCPU){ 342 return true; 343 } 344 return false; 345 } 346 347 348 // ** OUT_PORTS ** 349 out_port(probeNetwork_out, NBProbeRequestMsg, probeToCore); 350 out_port(responseNetwork_out, ResponseMsg, responseToCore); 351 352 out_port(triggerQueue_out, TriggerMsg, triggerQueue); 353 out_port(L3TriggerQueue_out, TriggerMsg, L3triggerQueue); 354 355 // ** IN_PORTS ** 356 357 // Trigger Queue 358 in_port(triggerQueue_in, TriggerMsg, triggerQueue, rank=5) { 359 if (triggerQueue_in.isReady(clockEdge())) { 360 peek(triggerQueue_in, TriggerMsg) { 361 TBE tbe := TBEs.lookup(in_msg.addr); 362 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); 363 if (in_msg.Type == TriggerType:AcksComplete) { 364 trigger(Event:ProbeAcksComplete, in_msg.addr, entry, tbe); 365 }else if (in_msg.Type == TriggerType:UnblockWriteThrough) { 366 trigger(Event:UnblockWriteThrough, in_msg.addr, entry, tbe); 367 } else { 368 error("Unknown trigger msg"); 369 } 370 } 371 } 372 } 373 374 in_port(L3TriggerQueue_in, TriggerMsg, L3triggerQueue, rank=4) { 375 if (L3TriggerQueue_in.isReady(clockEdge())) { 376 peek(L3TriggerQueue_in, TriggerMsg) { 377 TBE tbe := TBEs.lookup(in_msg.addr); 378 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); 379 if (in_msg.Type == TriggerType:L3Hit) { 380 trigger(Event:L3Hit, in_msg.addr, entry, tbe); 381 } else { 382 error("Unknown trigger msg"); 383 } 384 } 385 } 386 } 387 388 // Unblock Network 389 in_port(unblockNetwork_in, UnblockMsg, unblockFromCores, rank=3) { 390 if (unblockNetwork_in.isReady(clockEdge())) { 391 peek(unblockNetwork_in, UnblockMsg) { 392 TBE tbe := TBEs.lookup(in_msg.addr); 393 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); 394 trigger(Event:CoreUnblock, in_msg.addr, entry, tbe); 395 } 396 } 397 } 398 399 // Core response network 400 in_port(responseNetwork_in, ResponseMsg, responseFromCores, rank=2) { 401 if (responseNetwork_in.isReady(clockEdge())) { 402 peek(responseNetwork_in, ResponseMsg) { 403 TBE tbe := TBEs.lookup(in_msg.addr); 404 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); 405 if (in_msg.Type == CoherenceResponseType:CPUPrbResp) { 406 trigger(Event:CPUPrbResp, in_msg.addr, entry, tbe); 407 } else if (in_msg.Type == CoherenceResponseType:CPUData) { 408 trigger(Event:CPUData, in_msg.addr, entry, tbe); 409 } else if (in_msg.Type == CoherenceResponseType:StaleNotif) { 410 trigger(Event:StaleWB, in_msg.addr, entry, tbe); 411 } else { 412 error("Unexpected response type"); 413 } 414 } 415 } 416 } 417 418 // off-chip memory request/response is done 419 in_port(memQueue_in, MemoryMsg, responseFromMemory, rank=1) { 420 if (memQueue_in.isReady(clockEdge())) { 421 peek(memQueue_in, MemoryMsg) { 422 TBE tbe := TBEs.lookup(in_msg.addr); 423 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); 424 if (in_msg.Type == MemoryRequestType:MEMORY_READ) { 425 trigger(Event:MemData, in_msg.addr, entry, tbe); 426 DPRINTF(RubySlicc, "%s\n", in_msg); 427 } else if (in_msg.Type == MemoryRequestType:MEMORY_WB) { 428 trigger(Event:WBAck, in_msg.addr, entry, tbe); // ignore WBAcks, don't care about them. 429 } else { 430 DPRINTF(RubySlicc, "%s\n", in_msg.Type); 431 error("Invalid message"); 432 } 433 } 434 } 435 } 436 437 in_port(requestNetwork_in, CPURequestMsg, requestFromCores, rank=0) { 438 if (requestNetwork_in.isReady(clockEdge())) { 439 peek(requestNetwork_in, CPURequestMsg) { 440 TBE tbe := TBEs.lookup(in_msg.addr); 441 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(in_msg.addr)); 442 if (inclusiveDir && isNotPresentProbeFilter(in_msg.addr)) { 443 Addr victim := ProbeFilterMemory.cacheProbe(in_msg.addr); 444 tbe := TBEs.lookup(victim); 445 entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(victim)); 446 trigger(Event:PF_Repl, victim, entry, tbe); 447 } else if (in_msg.Type == CoherenceRequestType:RdBlk) { 448 trigger(Event:RdBlk, in_msg.addr, entry, tbe); 449 } else if (in_msg.Type == CoherenceRequestType:RdBlkS) { 450 trigger(Event:RdBlkS, in_msg.addr, entry, tbe); 451 } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { 452 trigger(Event:RdBlkM, in_msg.addr, entry, tbe); 453 } else if (in_msg.Type == CoherenceRequestType:WriteThrough) { 454 trigger(Event:WriteThrough, in_msg.addr, entry, tbe); 455 } else if (in_msg.Type == CoherenceRequestType:Atomic) { 456 trigger(Event:Atomic, in_msg.addr, entry, tbe); 457 } else if (in_msg.Type == CoherenceRequestType:VicDirty) { 458 if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { 459 DPRINTF(RubySlicc, "Dropping VicDirty for address %s\n", in_msg.addr); 460 trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); 461 } else { 462 DPRINTF(RubySlicc, "Got VicDirty from %s on %s\n", in_msg.Requestor, in_msg.addr); 463 trigger(Event:VicDirty, in_msg.addr, entry, tbe); 464 } 465 } else if (in_msg.Type == CoherenceRequestType:VicClean) { 466 if (getDirectoryEntry(in_msg.addr).VicDirtyIgnore.isElement(in_msg.Requestor)) { 467 DPRINTF(RubySlicc, "Dropping VicClean for address %s\n", in_msg.addr); 468 trigger(Event:StaleVicDirty, in_msg.addr, entry, tbe); 469 } else { 470 DPRINTF(RubySlicc, "Got VicClean from %s on %s\n", in_msg.Requestor, in_msg.addr); 471 trigger(Event:VicClean, in_msg.addr, entry, tbe); 472 } 473 } else { 474 error("Bad request message type"); 475 } 476 } 477 } 478 } 479 480 // Actions 481 action(s_sendResponseS, "s", desc="send Shared response") { 482 enqueue(responseNetwork_out, ResponseMsg, response_latency) { 483 out_msg.addr := address; 484 out_msg.Type := CoherenceResponseType:NBSysResp; 485 if (tbe.L3Hit) { 486 out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); 487 } else { 488 out_msg.Sender := machineID; 489 } 490 out_msg.Destination.add(tbe.OriginalRequestor); 491 out_msg.DataBlk := tbe.DataBlk; 492 out_msg.MessageSize := MessageSizeType:Response_Data; 493 out_msg.Dirty := false; 494 out_msg.State := CoherenceState:Shared; 495 out_msg.InitialRequestTime := tbe.InitialRequestTime; 496 out_msg.ForwardRequestTime := tbe.ForwardRequestTime; 497 out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; 498 out_msg.OriginalResponder := tbe.LastSender; 499 out_msg.L3Hit := tbe.L3Hit; 500 DPRINTF(RubySlicc, "%s\n", out_msg); 501 } 502 } 503 504 action(es_sendResponseES, "es", desc="send Exclusive or Shared response") { 505 enqueue(responseNetwork_out, ResponseMsg, response_latency) { 506 out_msg.addr := address; 507 out_msg.Type := CoherenceResponseType:NBSysResp; 508 if (tbe.L3Hit) { 509 out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); 510 } else { 511 out_msg.Sender := machineID; 512 } 513 out_msg.Destination.add(tbe.OriginalRequestor); 514 out_msg.DataBlk := tbe.DataBlk; 515 out_msg.MessageSize := MessageSizeType:Response_Data; 516 out_msg.Dirty := tbe.Dirty; 517 if (tbe.Cached) { 518 out_msg.State := CoherenceState:Shared; 519 } else { 520 out_msg.State := CoherenceState:Exclusive; 521 } 522 out_msg.InitialRequestTime := tbe.InitialRequestTime; 523 out_msg.ForwardRequestTime := tbe.ForwardRequestTime; 524 out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; 525 out_msg.OriginalResponder := tbe.LastSender; 526 out_msg.L3Hit := tbe.L3Hit; 527 DPRINTF(RubySlicc, "%s\n", out_msg); 528 } 529 } 530 531 // write-through and atomics do not send an unblock ack back to the 532 // directory. Hence, directory has to generate a self unblocking 533 // message. Additionally, write through's does not require data 534 // in its response. Hence, write through is treated seperately from 535 // write-back and atomics 536 action(m_sendResponseM, "m", desc="send Modified response") { 537 if (tbe.wtData) { 538 enqueue(triggerQueue_out, TriggerMsg, 1) { 539 out_msg.addr := address; 540 out_msg.Type := TriggerType:UnblockWriteThrough; 541 } 542 }else{ 543 enqueue(responseNetwork_out, ResponseMsg, response_latency) { 544 out_msg.addr := address; 545 out_msg.Type := CoherenceResponseType:NBSysResp; 546 if (tbe.L3Hit) { 547 out_msg.Sender := createMachineID(MachineType:L3Cache, intToID(0)); 548 } else { 549 out_msg.Sender := machineID; 550 } 551 out_msg.Destination.add(tbe.OriginalRequestor); 552 out_msg.DataBlk := tbe.DataBlk; 553 out_msg.MessageSize := MessageSizeType:Response_Data; 554 out_msg.Dirty := tbe.Dirty; 555 out_msg.State := CoherenceState:Modified; 556 out_msg.CtoD := false; 557 out_msg.InitialRequestTime := tbe.InitialRequestTime; 558 out_msg.ForwardRequestTime := tbe.ForwardRequestTime; 559 out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; 560 out_msg.OriginalResponder := tbe.LastSender; 561 if(tbe.atomicData){ 562 out_msg.WTRequestor := tbe.WTRequestor; 563 } 564 out_msg.L3Hit := tbe.L3Hit; 565 DPRINTF(RubySlicc, "%s\n", out_msg); 566 } 567 if (tbe.atomicData) { 568 enqueue(triggerQueue_out, TriggerMsg, 1) { 569 out_msg.addr := address; 570 out_msg.Type := TriggerType:UnblockWriteThrough; 571 } 572 } 573 } 574 } 575 576 action(c_sendResponseCtoD, "c", desc="send CtoD Ack") { 577 enqueue(responseNetwork_out, ResponseMsg, response_latency) { 578 out_msg.addr := address; 579 out_msg.Type := CoherenceResponseType:NBSysResp; 580 out_msg.Sender := machineID; 581 out_msg.Destination.add(tbe.OriginalRequestor); 582 out_msg.MessageSize := MessageSizeType:Response_Control; 583 out_msg.Dirty := false; 584 out_msg.State := CoherenceState:Modified; 585 out_msg.CtoD := true; 586 out_msg.InitialRequestTime := tbe.InitialRequestTime; 587 out_msg.ForwardRequestTime := curCycle(); 588 out_msg.ProbeRequestStartTime := tbe.ProbeRequestStartTime; 589 DPRINTF(RubySlicc, "%s\n", out_msg); 590 } 591 } 592 593 action(w_sendResponseWBAck, "w", desc="send WB Ack") { 594 peek(requestNetwork_in, CPURequestMsg) { 595 enqueue(responseNetwork_out, ResponseMsg, 1) { 596 out_msg.addr := address; 597 out_msg.Type := CoherenceResponseType:NBSysWBAck; 598 out_msg.Destination.add(in_msg.Requestor); 599 out_msg.WTRequestor := in_msg.WTRequestor; 600 out_msg.Sender := machineID; 601 out_msg.MessageSize := MessageSizeType:Writeback_Control; 602 out_msg.InitialRequestTime := in_msg.InitialRequestTime; 603 out_msg.ForwardRequestTime := curCycle(); 604 out_msg.ProbeRequestStartTime := curCycle(); 605 } 606 } 607 } 608 609 action(l_queueMemWBReq, "lq", desc="Write WB data to memory") { 610 peek(responseNetwork_in, ResponseMsg) { 611 queueMemoryWrite(machineID, address, to_memory_controller_latency, 612 in_msg.DataBlk); 613 } 614 } 615 616 action(l_queueMemRdReq, "lr", desc="Read data from memory") { 617 peek(requestNetwork_in, CPURequestMsg) { 618 if (L3CacheMemory.isTagPresent(address)) { 619 enqueue(L3TriggerQueue_out, TriggerMsg, l3_hit_latency) { 620 out_msg.addr := address; 621 out_msg.Type := TriggerType:L3Hit; 622 DPRINTF(RubySlicc, "%s\n", out_msg); 623 } 624 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); 625 tbe.DataBlk := entry.DataBlk; 626 tbe.LastSender := entry.LastSender; 627 tbe.L3Hit := true; 628 tbe.MemData := true; 629 L3CacheMemory.deallocate(address); 630 } else { 631 queueMemoryRead(machineID, address, to_memory_controller_latency); 632 } 633 } 634 } 635 636 action(dc_probeInvCoreData, "dc", desc="probe inv cores, return data") { 637 peek(requestNetwork_in, CPURequestMsg) { 638 enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { 639 out_msg.addr := address; 640 out_msg.Type := ProbeRequestType:PrbInv; 641 out_msg.ReturnData := true; 642 out_msg.MessageSize := MessageSizeType:Control; 643 if(isCPUSharer(address)) { 644 out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket 645 } 646 647 // add relevant TCC node to list. This replaces all TCPs and SQCs 648 if(isGPUSharer(address)) { 649 if ((in_msg.Type == CoherenceRequestType:WriteThrough || 650 in_msg.Type == CoherenceRequestType:Atomic) && 651 in_msg.NoWriteConflict) { 652 // Don't Include TCCs unless there was write-CAB conflict in the TCC 653 } else if(noTCCdir) { 654 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, 655 TCC_select_low_bit, TCC_select_num_bits)); 656 } else { 657 out_msg.Destination.add(mapAddressToMachine(address, MachineType:TCCdir)); 658 } 659 } 660 out_msg.Destination.remove(in_msg.Requestor); 661 tbe.NumPendingAcks := out_msg.Destination.count(); 662 if (tbe.NumPendingAcks == 0) { 663 enqueue(triggerQueue_out, TriggerMsg, 1) { 664 out_msg.addr := address; 665 out_msg.Type := TriggerType:AcksComplete; 666 } 667 } 668 DPRINTF(RubySlicc, "%s\n", out_msg); 669 APPEND_TRANSITION_COMMENT(" dc: Acks remaining: "); 670 APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); 671 tbe.ProbeRequestStartTime := curCycle(); 672 } 673 } 674 } 675 676 action(bp_backProbe, "bp", desc="back probe") { 677 enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { 678 out_msg.addr := address; 679 out_msg.Type := ProbeRequestType:PrbInv; 680 out_msg.ReturnData := true; 681 out_msg.MessageSize := MessageSizeType:Control; 682 if(isCPUSharer(address)) { 683 // won't be realistic for multisocket 684 out_msg.Destination.broadcast(MachineType:CorePair); 685 } 686 // add relevant TCC node to the list. This replaces all TCPs and SQCs 687 if(isGPUSharer(address)) { 688 if (noTCCdir) { 689 //Don't need to notify TCC about reads 690 } else { 691 out_msg.Destination.add(mapAddressToMachine(address, MachineType:TCCdir)); 692 tbe.NumPendingAcks := tbe.NumPendingAcks + 1; 693 } 694 if (noTCCdir && CAB_TCC) { 695 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, 696 TCC_select_low_bit, TCC_select_num_bits)); 697 } 698 } 699 tbe.NumPendingAcks := out_msg.Destination.count(); 700 if (tbe.NumPendingAcks == 0) { 701 enqueue(triggerQueue_out, TriggerMsg, 1) { 702 out_msg.addr := address; 703 out_msg.Type := TriggerType:AcksComplete; 704 } 705 } 706 DPRINTF(RubySlicc, "%s\n", (out_msg)); 707 APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); 708 APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); 709 APPEND_TRANSITION_COMMENT(" - back probe"); 710 tbe.ProbeRequestStartTime := curCycle(); 711 } 712 } 713 714 action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { 715 peek(requestNetwork_in, CPURequestMsg) { // not the right network? 716 enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { 717 out_msg.addr := address; 718 out_msg.Type := ProbeRequestType:PrbDowngrade; 719 out_msg.ReturnData := true; 720 out_msg.MessageSize := MessageSizeType:Control; 721 if(isCPUSharer(address)) { 722 out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket 723 } 724 // add relevant TCC node to the list. This replaces all TCPs and SQCs 725 if(isGPUSharer(address)) { 726 if (noTCCdir) { 727 //Don't need to notify TCC about reads 728 } else { 729 out_msg.Destination.add(mapAddressToMachine(address, MachineType:TCCdir)); 730 tbe.NumPendingAcks := tbe.NumPendingAcks + 1; 731 } 732 if (noTCCdir && CAB_TCC) { 733 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, 734 TCC_select_low_bit, TCC_select_num_bits)); 735 } 736 } 737 out_msg.Destination.remove(in_msg.Requestor); 738 tbe.NumPendingAcks := out_msg.Destination.count(); 739 if (tbe.NumPendingAcks == 0) { 740 enqueue(triggerQueue_out, TriggerMsg, 1) { 741 out_msg.addr := address; 742 out_msg.Type := TriggerType:AcksComplete; 743 } 744 } 745 DPRINTF(RubySlicc, "%s\n", (out_msg)); 746 APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); 747 APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); 748 tbe.ProbeRequestStartTime := curCycle(); 749 } 750 } 751 } 752 753 action(ic_probeInvCore, "ic", desc="probe invalidate core, no return data needed") { 754 peek(requestNetwork_in, CPURequestMsg) { // not the right network? 755 enqueue(probeNetwork_out, NBProbeRequestMsg, response_latency) { 756 out_msg.addr := address; 757 out_msg.Type := ProbeRequestType:PrbInv; 758 out_msg.ReturnData := false; 759 out_msg.MessageSize := MessageSizeType:Control; 760 if(isCPUSharer(address)) { 761 out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket 762 } 763 764 // add relevant TCC node to the list. This replaces all TCPs and SQCs 765 if(isGPUSharer(address)) { 766 if (noTCCdir) { 767 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, 768 TCC_select_low_bit, TCC_select_num_bits)); 769 } else { 770 out_msg.Destination.add(mapAddressToMachine(address, MachineType:TCCdir)); 771 } 772 } 773 out_msg.Destination.remove(in_msg.Requestor); 774 tbe.NumPendingAcks := out_msg.Destination.count(); 775 if (tbe.NumPendingAcks == 0) { 776 enqueue(triggerQueue_out, TriggerMsg, 1) { 777 out_msg.addr := address; 778 out_msg.Type := TriggerType:AcksComplete; 779 } 780 } 781 APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); 782 APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); 783 DPRINTF(RubySlicc, "%s\n", out_msg); 784 tbe.ProbeRequestStartTime := curCycle(); 785 } 786 } 787 } 788 789 action(sm_setMRU, "sm", desc="set probe filter entry as MRU") { 790 ProbeFilterMemory.setMRU(address); 791 } 792 793 action(d_writeDataToMemory, "d", desc="Write data to memory") { 794 peek(responseNetwork_in, ResponseMsg) { 795 getDirectoryEntry(address).DataBlk := in_msg.DataBlk; 796 DPRINTF(RubySlicc, "Writing Data: %s to address %s\n", in_msg.DataBlk, 797 in_msg.addr); 798 } 799 } 800 801 action(te_allocateTBEForEviction, "te", desc="allocate TBE Entry") { 802 check_allocate(TBEs); 803 TBEs.allocate(address); 804 set_tbe(TBEs.lookup(address)); 805 tbe.writeMask.clear(); 806 tbe.wtData := false; 807 tbe.atomicData := false; 808 tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs 809 tbe.Dirty := false; 810 tbe.NumPendingAcks := 0; 811 } 812 813 action(t_allocateTBE, "t", desc="allocate TBE Entry") { 814 check_allocate(TBEs); 815 peek(requestNetwork_in, CPURequestMsg) { 816 TBEs.allocate(address); 817 set_tbe(TBEs.lookup(address)); 818 if (in_msg.Type == CoherenceRequestType:WriteThrough) { 819 tbe.writeMask.clear(); 820 tbe.writeMask.orMask(in_msg.writeMask); 821 tbe.wtData := true; 822 tbe.WTRequestor := in_msg.WTRequestor; 823 tbe.LastSender := in_msg.Requestor; 824 } 825 if (in_msg.Type == CoherenceRequestType:Atomic) { 826 tbe.writeMask.clear(); 827 tbe.writeMask.orMask(in_msg.writeMask); 828 tbe.atomicData := true; 829 tbe.WTRequestor := in_msg.WTRequestor; 830 tbe.LastSender := in_msg.Requestor; 831 } 832 tbe.DataBlk := getDirectoryEntry(address).DataBlk; // Data only for WBs 833 tbe.Dirty := false; 834 if (in_msg.Type == CoherenceRequestType:WriteThrough) { 835 tbe.DataBlk.copyPartial(in_msg.DataBlk,tbe.writeMask); 836 tbe.Dirty := false; 837 } 838 tbe.OriginalRequestor := in_msg.Requestor; 839 tbe.NumPendingAcks := 0; 840 tbe.Cached := in_msg.ForceShared; 841 tbe.InitialRequestTime := in_msg.InitialRequestTime; 842 } 843 } 844 845 action(dt_deallocateTBE, "dt", desc="deallocate TBE Entry") { 846 if (tbe.Dirty == false) { 847 getDirectoryEntry(address).DataBlk := tbe.DataBlk; 848 } 849 TBEs.deallocate(address); 850 unset_tbe(); 851 } 852 853 action(wd_writeBackData, "wd", desc="Write back data if needed") { 854 if (tbe.wtData) { 855 DataBlock tmp := getDirectoryEntry(address).DataBlk; 856 tmp.copyPartial(tbe.DataBlk,tbe.writeMask); 857 tbe.DataBlk := tmp; 858 getDirectoryEntry(address).DataBlk := tbe.DataBlk; 859 } else if (tbe.atomicData) { 860 tbe.DataBlk.atomicPartial(getDirectoryEntry(address).DataBlk, 861 tbe.writeMask); 862 getDirectoryEntry(address).DataBlk := tbe.DataBlk; 863 } else if (tbe.Dirty == false) { 864 getDirectoryEntry(address).DataBlk := tbe.DataBlk; 865 } 866 } 867 868 action(mt_writeMemDataToTBE, "mt", desc="write Mem data to TBE") { 869 peek(memQueue_in, MemoryMsg) { 870 if (tbe.wtData == true) { 871 // DO Nothing (already have the directory data) 872 } else if (tbe.Dirty == false) { 873 tbe.DataBlk := getDirectoryEntry(address).DataBlk; 874 } 875 tbe.MemData := true; 876 } 877 } 878 879 action(y_writeProbeDataToTBE, "y", desc="write Probe Data to TBE") { 880 peek(responseNetwork_in, ResponseMsg) { 881 if (in_msg.Dirty) { 882 DPRINTF(RubySlicc, "Got dirty data for %s from %s\n", address, in_msg.Sender); 883 DPRINTF(RubySlicc, "Data is %s\n", in_msg.DataBlk); 884 if (tbe.wtData) { 885 DataBlock tmp := in_msg.DataBlk; 886 tmp.copyPartial(tbe.DataBlk,tbe.writeMask); 887 tbe.DataBlk := tmp; 888 } else if (tbe.Dirty) { 889 if(tbe.atomicData == false && tbe.wtData == false) { 890 DPRINTF(RubySlicc, "Got double data for %s from %s\n", address, in_msg.Sender); 891 assert(tbe.DataBlk == in_msg.DataBlk); // in case of double data 892 } 893 } else { 894 tbe.DataBlk := in_msg.DataBlk; 895 tbe.Dirty := in_msg.Dirty; 896 tbe.LastSender := in_msg.Sender; 897 } 898 } 899 if (in_msg.Hit) { 900 tbe.Cached := true; 901 } 902 } 903 } 904 905 action(mwc_markSinkWriteCancel, "mwc", desc="Mark to sink impending VicDirty") { 906 peek(responseNetwork_in, ResponseMsg) { 907 DPRINTF(RubySlicc, "Write cancel bit set on address %s\n", address); 908 getDirectoryEntry(address).VicDirtyIgnore.add(in_msg.Sender); 909 APPEND_TRANSITION_COMMENT(" setting bit to sink VicDirty "); 910 } 911 } 912 913 action(x_decrementAcks, "x", desc="decrement Acks pending") { 914 tbe.NumPendingAcks := tbe.NumPendingAcks - 1; 915 APPEND_TRANSITION_COMMENT(" Acks remaining: "); 916 APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); 917 } 918 919 action(o_checkForCompletion, "o", desc="check for ack completion") { 920 if (tbe.NumPendingAcks == 0) { 921 enqueue(triggerQueue_out, TriggerMsg, 1) { 922 out_msg.addr := address; 923 out_msg.Type := TriggerType:AcksComplete; 924 } 925 } 926 APPEND_TRANSITION_COMMENT(" Check: Acks remaining: "); 927 APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); 928 } 929 930 action(rv_removeVicDirtyIgnore, "rv", desc="Remove ignored core") { 931 peek(requestNetwork_in, CPURequestMsg) { 932 getDirectoryEntry(address).VicDirtyIgnore.remove(in_msg.Requestor); 933 } 934 } 935 936 action(al_allocateL3Block, "al", desc="allocate the L3 block on WB") { 937 peek(responseNetwork_in, ResponseMsg) { 938 if (L3CacheMemory.isTagPresent(address)) { 939 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); 940 APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); 941 entry.DataBlk := in_msg.DataBlk; 942 entry.LastSender := in_msg.Sender; 943 } else { 944 if (L3CacheMemory.cacheAvail(address) == false) { 945 Addr victim := L3CacheMemory.cacheProbe(address); 946 CacheEntry victim_entry := static_cast(CacheEntry, "pointer", 947 L3CacheMemory.lookup(victim)); 948 queueMemoryWrite(machineID, victim, to_memory_controller_latency, 949 victim_entry.DataBlk); 950 L3CacheMemory.deallocate(victim); 951 } 952 assert(L3CacheMemory.cacheAvail(address)); 953 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); 954 APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); 955 entry.DataBlk := in_msg.DataBlk; 956 957 entry.LastSender := in_msg.Sender; 958 } 959 } 960 } 961 962 action(alwt_allocateL3BlockOnWT, "alwt", desc="allocate the L3 block on WT") { 963 if ((tbe.wtData || tbe.atomicData) && useL3OnWT) { 964 if (L3CacheMemory.isTagPresent(address)) { 965 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.lookup(address)); 966 APPEND_TRANSITION_COMMENT(" al wrote data to L3 (hit) "); 967 entry.DataBlk := tbe.DataBlk; 968 entry.LastSender := tbe.LastSender; 969 } else { 970 if (L3CacheMemory.cacheAvail(address) == false) { 971 Addr victim := L3CacheMemory.cacheProbe(address); 972 CacheEntry victim_entry := static_cast(CacheEntry, "pointer", 973 L3CacheMemory.lookup(victim)); 974 queueMemoryWrite(machineID, victim, to_memory_controller_latency, 975 victim_entry.DataBlk); 976 L3CacheMemory.deallocate(victim); 977 } 978 assert(L3CacheMemory.cacheAvail(address)); 979 CacheEntry entry := static_cast(CacheEntry, "pointer", L3CacheMemory.allocate(address, new CacheEntry)); 980 APPEND_TRANSITION_COMMENT(" al wrote data to L3 "); 981 entry.DataBlk := tbe.DataBlk; 982 entry.LastSender := tbe.LastSender; 983 } 984 } 985 } 986 987 action(apf_allocateProbeFilterEntry, "apf", desc="Allocate probe filte entry") { 988 if (!ProbeFilterMemory.isTagPresent(address)) { 989 if (inclusiveDir) { 990 assert(ProbeFilterMemory.cacheAvail(address)); 991 } else if (ProbeFilterMemory.cacheAvail(address) == false) { 992 Addr victim := ProbeFilterMemory.cacheProbe(address); 993 ProbeFilterMemory.deallocate(victim); 994 } 995 assert(ProbeFilterMemory.cacheAvail(address)); 996 CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.allocate(address, new CacheEntry)); 997 APPEND_TRANSITION_COMMENT(" allocating a new probe filter entry"); 998 entry.pfState := ProbeFilterState:NT; 999 if (inclusiveDir) { 1000 entry.pfState := ProbeFilterState:T; 1001 } 1002 entry.isOnCPU := false; 1003 entry.isOnGPU := false; 1004 } 1005 } 1006 1007 action(mpfe_markPFEntryForEviction, "mpfe", desc="Mark this PF entry is being evicted") { 1008 assert(ProbeFilterMemory.isTagPresent(address)); 1009 CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); 1010 entry.pfState := ProbeFilterState:B; 1011 peek(requestNetwork_in, CPURequestMsg) { 1012 tbe.demandAddress := in_msg.addr; 1013 } 1014 } 1015 1016 action(we_wakeUpEvictionDependents, "we", desc="Wake up requests waiting for demand address and victim address") { 1017 wakeUpBuffers(address); 1018 wakeUpBuffers(tbe.demandAddress); 1019 } 1020 1021 action(dpf_deallocateProbeFilter, "dpf", desc="deallocate PF entry") { 1022 assert(ProbeFilterMemory.isTagPresent(address)); 1023 ProbeFilterMemory.deallocate(address); 1024 } 1025 1026 action(upf_updateProbeFilter, "upf", desc="") { 1027 peek(requestNetwork_in, CPURequestMsg) { 1028 assert(ProbeFilterMemory.isTagPresent(address)); 1029 CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); 1030 if (in_msg.Type == CoherenceRequestType:WriteThrough) { 1031 entry.pfState := ProbeFilterState:T; 1032 entry.isOnCPU := false; 1033 entry.isOnGPU := false; 1034 } else if (in_msg.Type == CoherenceRequestType:Atomic) { 1035 entry.pfState := ProbeFilterState:T; 1036 entry.isOnCPU := false; 1037 entry.isOnGPU := false; 1038 } else if (in_msg.Type == CoherenceRequestType:RdBlkM) { 1039 entry.pfState := ProbeFilterState:T; 1040 entry.isOnCPU := false; 1041 entry.isOnGPU := false; 1042 } else if (in_msg.Type == CoherenceRequestType:CtoD) { 1043 entry.pfState := ProbeFilterState:T; 1044 entry.isOnCPU := false; 1045 entry.isOnGPU := false; 1046 } 1047 if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) { 1048 entry.isOnCPU := true; 1049 } else { 1050 entry.isOnGPU := true; 1051 } 1052 } 1053 } 1054 1055 action(rmcd_removeSharerConditional, "rmcd", desc="remove sharer from probe Filter, conditional") { 1056 peek(requestNetwork_in, CPURequestMsg) { 1057 if (ProbeFilterMemory.isTagPresent(address)) { 1058 CacheEntry entry := static_cast(CacheEntry, "pointer", ProbeFilterMemory.lookup(address)); 1059 if(machineIDToMachineType(in_msg.Requestor) == MachineType:CorePair) {//CorePair has inclusive L2 1060 if (in_msg.Type == CoherenceRequestType:VicDirty) { 1061 entry.isOnCPU := false; 1062 } else if (in_msg.Type == CoherenceRequestType:VicClean) { 1063 entry.isOnCPU := false; 1064 } 1065 } 1066 } 1067 } 1068 } 1069 1070 action(sf_setForwardReqTime, "sf", desc="...") { 1071 tbe.ForwardRequestTime := curCycle(); 1072 } 1073 1074 action(dl_deallocateL3, "dl", desc="deallocate the L3 block") { 1075 L3CacheMemory.deallocate(address); 1076 } 1077 1078 action(p_popRequestQueue, "p", desc="pop request queue") { 1079 requestNetwork_in.dequeue(clockEdge()); 1080 } 1081 1082 action(pr_popResponseQueue, "pr", desc="pop response queue") { 1083 responseNetwork_in.dequeue(clockEdge()); 1084 } 1085 1086 action(pm_popMemQueue, "pm", desc="pop mem queue") { 1087 memQueue_in.dequeue(clockEdge()); 1088 } 1089 1090 action(pt_popTriggerQueue, "pt", desc="pop trigger queue") { 1091 triggerQueue_in.dequeue(clockEdge()); 1092 } 1093 1094 action(ptl_popTriggerQueue, "ptl", desc="pop L3 trigger queue") { 1095 L3TriggerQueue_in.dequeue(clockEdge()); 1096 } 1097 1098 action(pu_popUnblockQueue, "pu", desc="pop unblock queue") { 1099 unblockNetwork_in.dequeue(clockEdge()); 1100 } 1101 1102 action(zz_recycleRequestQueue, "zz", desc="recycle request queue") { 1103 requestNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); 1104 } 1105 1106 action(yy_recycleResponseQueue, "yy", desc="recycle response queue") { 1107 responseNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); 1108 } 1109 1110 action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") { 1111 stall_and_wait(requestNetwork_in, address); 1112 } 1113 1114 action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") { 1115 wakeUpBuffers(address); 1116 } 1117 1118 action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") { 1119 wakeUpAllBuffers(); 1120 } 1121 1122 action(z_stall, "z", desc="...") { 1123 } 1124 1125 // TRANSITIONS 1126 transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {RdBlkS, RdBlkM, RdBlk, CtoD}) { 1127 st_stallAndWaitRequest; 1128 } 1129 1130 // It may be possible to save multiple invalidations here! 1131 transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, {Atomic, WriteThrough}) { 1132 st_stallAndWaitRequest; 1133 } 1134 1135 1136 // transitions from U 1137 transition(U, PF_Repl, B_P) {PFTagArrayRead, PFTagArrayWrite}{ 1138 te_allocateTBEForEviction; 1139 apf_allocateProbeFilterEntry; 1140 bp_backProbe; 1141 sm_setMRU; 1142 mpfe_markPFEntryForEviction; 1143 } 1144 1145 transition(U, {RdBlkS}, BS_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} { 1146 t_allocateTBE; 1147 apf_allocateProbeFilterEntry; 1148 l_queueMemRdReq; 1149 sc_probeShrCoreData; 1150 sm_setMRU; 1151 upf_updateProbeFilter; 1152 p_popRequestQueue; 1153 } 1154 1155 transition(U, WriteThrough, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} { 1156 t_allocateTBE; 1157 apf_allocateProbeFilterEntry; 1158 w_sendResponseWBAck; 1159 l_queueMemRdReq; 1160 dc_probeInvCoreData; 1161 sm_setMRU; 1162 upf_updateProbeFilter; 1163 p_popRequestQueue; 1164 } 1165 1166 transition(U, Atomic, BM_PM) {L3TagArrayRead, L3TagArrayWrite, PFTagArrayRead, PFTagArrayWrite} { 1167 t_allocateTBE; 1168 apf_allocateProbeFilterEntry; 1169 l_queueMemRdReq; 1170 dc_probeInvCoreData; 1171 sm_setMRU; 1172 upf_updateProbeFilter; 1173 p_popRequestQueue; 1174 } 1175 1176 transition(U, {RdBlkM}, BM_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} { 1177 t_allocateTBE; 1178 apf_allocateProbeFilterEntry; 1179 l_queueMemRdReq; 1180 dc_probeInvCoreData; 1181 sm_setMRU; 1182 upf_updateProbeFilter; 1183 p_popRequestQueue; 1184 } 1185 1186 transition(U, RdBlk, B_PM) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite}{ 1187 t_allocateTBE; 1188 apf_allocateProbeFilterEntry; 1189 l_queueMemRdReq; 1190 sc_probeShrCoreData; 1191 sm_setMRU; 1192 upf_updateProbeFilter; 1193 p_popRequestQueue; 1194 } 1195 1196 transition(U, CtoD, BP) {L3TagArrayRead, PFTagArrayRead, PFTagArrayWrite} { 1197 t_allocateTBE; 1198 apf_allocateProbeFilterEntry; 1199 ic_probeInvCore; 1200 sm_setMRU; 1201 upf_updateProbeFilter; 1202 p_popRequestQueue; 1203 } 1204 1205 transition(U, VicDirty, BL) {L3TagArrayRead} { 1206 t_allocateTBE; 1207 w_sendResponseWBAck; 1208 rmcd_removeSharerConditional; 1209 p_popRequestQueue; 1210 } 1211 1212 transition(U, VicClean, BL) {L3TagArrayRead} { 1213 t_allocateTBE; 1214 w_sendResponseWBAck; 1215 rmcd_removeSharerConditional; 1216 p_popRequestQueue; 1217 } 1218 1219 transition(BL, {VicDirty, VicClean}) { 1220 zz_recycleRequestQueue; 1221 } 1222 1223 transition(BL, CPUData, U) {L3TagArrayWrite, L3DataArrayWrite} { 1224 d_writeDataToMemory; 1225 al_allocateL3Block; 1226 wa_wakeUpDependents; 1227 dt_deallocateTBE; 1228 //l_queueMemWBReq; // why need an ack? esp. with DRAMSim, just put it in queue no ack needed 1229 pr_popResponseQueue; 1230 } 1231 1232 transition(BL, StaleWB, U) {L3TagArrayWrite} { 1233 dt_deallocateTBE; 1234 wa_wakeUpAllDependents; 1235 pr_popResponseQueue; 1236 } 1237 1238 transition({B, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P}, {VicDirty, VicClean}) { 1239 z_stall; 1240 } 1241 1242 transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, WBAck) { 1243 pm_popMemQueue; 1244 } 1245 1246 transition({BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, PF_Repl) { 1247 zz_recycleRequestQueue; 1248 } 1249 1250 transition({U, BL, BS_M, BM_M, B_M, BP, BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, B}, StaleVicDirty) { 1251 rv_removeVicDirtyIgnore; 1252 w_sendResponseWBAck; 1253 p_popRequestQueue; 1254 } 1255 1256 transition({B}, CoreUnblock, U) { 1257 wa_wakeUpDependents; 1258 pu_popUnblockQueue; 1259 } 1260 1261 transition(B, UnblockWriteThrough, U) { 1262 wa_wakeUpDependents; 1263 pt_popTriggerQueue; 1264 } 1265 1266 transition(BS_PM, MemData, BS_Pm) {} { 1267 mt_writeMemDataToTBE; 1268 pm_popMemQueue; 1269 } 1270 1271 transition(BM_PM, MemData, BM_Pm){} { 1272 mt_writeMemDataToTBE; 1273 pm_popMemQueue; 1274 } 1275 1276 transition(B_PM, MemData, B_Pm){} { 1277 mt_writeMemDataToTBE; 1278 pm_popMemQueue; 1279 } 1280 1281 transition(BS_PM, L3Hit, BS_Pm) {} { 1282 ptl_popTriggerQueue; 1283 } 1284 1285 transition(BM_PM, L3Hit, BM_Pm) {} { 1286 ptl_popTriggerQueue; 1287 } 1288 1289 transition(B_PM, L3Hit, B_Pm) {} { 1290 ptl_popTriggerQueue; 1291 } 1292 1293 transition(BS_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { 1294 mt_writeMemDataToTBE; 1295 s_sendResponseS; 1296 wd_writeBackData; 1297 alwt_allocateL3BlockOnWT; 1298 dt_deallocateTBE; 1299 pm_popMemQueue; 1300 } 1301 1302 transition(BM_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { 1303 mt_writeMemDataToTBE; 1304 m_sendResponseM; 1305 wd_writeBackData; 1306 alwt_allocateL3BlockOnWT; 1307 dt_deallocateTBE; 1308 pm_popMemQueue; 1309 } 1310 1311 transition(B_M, MemData, B){L3TagArrayWrite, L3DataArrayWrite} { 1312 mt_writeMemDataToTBE; 1313 es_sendResponseES; 1314 wd_writeBackData; 1315 alwt_allocateL3BlockOnWT; 1316 dt_deallocateTBE; 1317 pm_popMemQueue; 1318 } 1319 1320 transition(BS_M, L3Hit, B) {L3TagArrayWrite, L3DataArrayWrite} { 1321 s_sendResponseS; 1322 wd_writeBackData; 1323 alwt_allocateL3BlockOnWT; 1324 dt_deallocateTBE; 1325 ptl_popTriggerQueue; 1326 } 1327 1328 transition(BM_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} { 1329 m_sendResponseM; 1330 wd_writeBackData; 1331 alwt_allocateL3BlockOnWT; 1332 dt_deallocateTBE; 1333 ptl_popTriggerQueue; 1334 } 1335 1336 transition(B_M, L3Hit, B) {L3DataArrayWrite, L3TagArrayWrite} { 1337 es_sendResponseES; 1338 wd_writeBackData; 1339 alwt_allocateL3BlockOnWT; 1340 dt_deallocateTBE; 1341 ptl_popTriggerQueue; 1342 } 1343 1344 transition({BS_PM, BM_PM, B_PM, BS_Pm, BM_Pm, B_Pm, B_P, BP}, CPUPrbResp) { 1345 y_writeProbeDataToTBE; 1346 x_decrementAcks; 1347 o_checkForCompletion; 1348 pr_popResponseQueue; 1349 } 1350 1351 transition(BS_PM, ProbeAcksComplete, BS_M) {} { 1352 sf_setForwardReqTime; 1353 pt_popTriggerQueue; 1354 } 1355 1356 transition(BM_PM, ProbeAcksComplete, BM_M) {} { 1357 sf_setForwardReqTime; 1358 pt_popTriggerQueue; 1359 } 1360 1361 transition(B_PM, ProbeAcksComplete, B_M){} { 1362 sf_setForwardReqTime; 1363 pt_popTriggerQueue; 1364 } 1365 1366 transition(BS_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { 1367 sf_setForwardReqTime; 1368 s_sendResponseS; 1369 wd_writeBackData; 1370 alwt_allocateL3BlockOnWT; 1371 dt_deallocateTBE; 1372 pt_popTriggerQueue; 1373 } 1374 1375 transition(BM_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { 1376 sf_setForwardReqTime; 1377 m_sendResponseM; 1378 wd_writeBackData; 1379 alwt_allocateL3BlockOnWT; 1380 dt_deallocateTBE; 1381 pt_popTriggerQueue; 1382 } 1383 1384 transition(B_Pm, ProbeAcksComplete, B){L3DataArrayWrite, L3TagArrayWrite} { 1385 sf_setForwardReqTime; 1386 es_sendResponseES; 1387 wd_writeBackData; 1388 alwt_allocateL3BlockOnWT; 1389 dt_deallocateTBE; 1390 pt_popTriggerQueue; 1391 } 1392 1393 transition(B_P, ProbeAcksComplete, U) { 1394 wd_writeBackData; 1395 alwt_allocateL3BlockOnWT; 1396 we_wakeUpEvictionDependents; 1397 dpf_deallocateProbeFilter; 1398 dt_deallocateTBE; 1399 pt_popTriggerQueue; 1400 } 1401 1402 transition(BP, ProbeAcksComplete, B){L3TagArrayWrite, L3TagArrayWrite} { 1403 sf_setForwardReqTime; 1404 c_sendResponseCtoD; 1405 wd_writeBackData; 1406 alwt_allocateL3BlockOnWT; 1407 dt_deallocateTBE; 1408 pt_popTriggerQueue; 1409 } 1410} 1411