1/* 2 * Copyright (c) 2011-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Lisa Hsu 34 */ 35 36machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") 37 : GPUCoalescer* coalescer; 38 Sequencer* sequencer; 39 bool use_seq_not_coal; 40 CacheMemory * L1cache; 41 int TCC_select_num_bits; 42 Cycles issue_latency := 40; // time to send data down to TCC 43 Cycles l2_hit_latency := 18; 44 45 MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request"; 46 MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response"; 47 MessageBuffer * unblockFromCore, network="To", virtual_network="5", vnet_type="unblock"; 48 49 MessageBuffer * probeToTCP, network="From", virtual_network="1", vnet_type="request"; 50 MessageBuffer * responseToTCP, network="From", virtual_network="3", vnet_type="response"; 51 52 MessageBuffer * mandatoryQueue; 53{ 54 state_declaration(State, desc="TCP Cache States", default="TCP_State_I") { 55 I, AccessPermission:Invalid, desc="Invalid"; 56 S, AccessPermission:Read_Only, desc="Shared"; 57 E, AccessPermission:Read_Write, desc="Exclusive"; 58 O, AccessPermission:Read_Only, desc="Owner state in core, both clusters and other cores may be sharing line"; 59 M, AccessPermission:Read_Write, desc="Modified"; 60 61 I_M, AccessPermission:Busy, desc="Invalid, issued RdBlkM, have not seen response yet"; 62 I_ES, AccessPermission:Busy, desc="Invalid, issued RdBlk, have not seen response yet"; 63 S_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; 64 O_M, AccessPermission:Read_Only, desc="Shared, issued CtoD, have not seen response yet"; 65 66 ES_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for clean WB ack"; 67 MO_I, AccessPermission:Read_Only, desc="L1 replacement, waiting for dirty WB ack"; 68 69 MO_PI, AccessPermission:Read_Only, desc="L1 downgrade, waiting for CtoD ack (or ProbeInvalidateData)"; 70 71 I_C, AccessPermission:Invalid, desc="Invalid, waiting for WBAck from TCC for canceled WB"; 72 } 73 74 enumeration(Event, desc="TCP Events") { 75 // Core initiated 76 Load, desc="Load"; 77 Store, desc="Store"; 78 79 // TCC initiated 80 TCC_AckS, desc="TCC Ack to Core Request"; 81 TCC_AckE, desc="TCC Ack to Core Request"; 82 TCC_AckM, desc="TCC Ack to Core Request"; 83 TCC_AckCtoD, desc="TCC Ack to Core Request"; 84 TCC_AckWB, desc="TCC Ack for clean WB"; 85 TCC_NackWB, desc="TCC Nack for clean WB"; 86 87 // Mem sys initiated 88 Repl, desc="Replacing block from cache"; 89 90 // Probe Events 91 PrbInvData, desc="probe, return O or M data"; 92 PrbInv, desc="probe, no need for data"; 93 LocalPrbInv, desc="local probe, no need for data"; 94 PrbShrData, desc="probe downgrade, return O or M data"; 95 } 96 97 enumeration(RequestType, desc="To communicate stats from transitions to recordStats") { 98 DataArrayRead, desc="Read the data array"; 99 DataArrayWrite, desc="Write the data array"; 100 TagArrayRead, desc="Read the data array"; 101 TagArrayWrite, desc="Write the data array"; 102 } 103 104 105 structure(Entry, desc="...", interface="AbstractCacheEntry") { 106 State CacheState, desc="cache state"; 107 bool Dirty, desc="Is the data dirty (diff than memory)?"; 108 DataBlock DataBlk, desc="data for the block"; 109 bool FromL2, default="false", desc="block just moved from L2"; 110 } 111 112 structure(TBE, desc="...") { 113 State TBEState, desc="Transient state"; 114 DataBlock DataBlk, desc="data for the block, required for concurrent writebacks"; 115 bool Dirty, desc="Is the data dirty (different than memory)?"; 116 int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for"; 117 bool Shared, desc="Victim hit by shared probe"; 118 } 119 120 structure(TBETable, external="yes") { 121 TBE lookup(Addr); 122 void allocate(Addr); 123 void deallocate(Addr); 124 bool isPresent(Addr); 125 } 126 127 TBETable TBEs, template="<TCP_TBE>", constructor="m_number_of_TBEs"; 128 int TCC_select_low_bit, default="RubySystem::getBlockSizeBits()"; 129 130 Tick clockEdge(); 131 Tick cyclesToTicks(Cycles c); 132 133 void set_cache_entry(AbstractCacheEntry b); 134 void unset_cache_entry(); 135 void set_tbe(TBE b); 136 void unset_tbe(); 137 void wakeUpAllBuffers(); 138 void wakeUpBuffers(Addr a); 139 Cycles curCycle(); 140 141 // Internal functions 142 Entry getCacheEntry(Addr address), return_by_pointer="yes" { 143 Entry cache_entry := static_cast(Entry, "pointer", L1cache.lookup(address)); 144 return cache_entry; 145 } 146 147 DataBlock getDataBlock(Addr addr), return_by_ref="yes" { 148 TBE tbe := TBEs.lookup(addr); 149 if(is_valid(tbe)) { 150 return tbe.DataBlk; 151 } else { 152 return getCacheEntry(addr).DataBlk; 153 } 154 } 155 156 State getState(TBE tbe, Entry cache_entry, Addr addr) { 157 if(is_valid(tbe)) { 158 return tbe.TBEState; 159 } else if (is_valid(cache_entry)) { 160 return cache_entry.CacheState; 161 } 162 return State:I; 163 } 164 165 void setState(TBE tbe, Entry cache_entry, Addr addr, State state) { 166 if (is_valid(tbe)) { 167 tbe.TBEState := state; 168 } 169 170 if (is_valid(cache_entry)) { 171 cache_entry.CacheState := state; 172 } 173 } 174 175 AccessPermission getAccessPermission(Addr addr) { 176 TBE tbe := TBEs.lookup(addr); 177 if(is_valid(tbe)) { 178 return TCP_State_to_permission(tbe.TBEState); 179 } 180 181 Entry cache_entry := getCacheEntry(addr); 182 if(is_valid(cache_entry)) { 183 return TCP_State_to_permission(cache_entry.CacheState); 184 } 185 186 return AccessPermission:NotPresent; 187 } 188 189 bool isValid(Addr addr) { 190 AccessPermission perm := getAccessPermission(addr); 191 if (perm == AccessPermission:NotPresent || 192 perm == AccessPermission:Invalid || 193 perm == AccessPermission:Busy) { 194 return false; 195 } else { 196 return true; 197 } 198 } 199 200 void setAccessPermission(Entry cache_entry, Addr addr, State state) { 201 if (is_valid(cache_entry)) { 202 cache_entry.changePermission(TCP_State_to_permission(state)); 203 } 204 } 205 206 void functionalRead(Addr addr, Packet *pkt) { 207 TBE tbe := TBEs.lookup(addr); 208 if(is_valid(tbe)) { 209 testAndRead(addr, tbe.DataBlk, pkt); 210 } else { 211 functionalMemoryRead(pkt); 212 } 213 } 214 215 int functionalWrite(Addr addr, Packet *pkt) { 216 int num_functional_writes := 0; 217 218 TBE tbe := TBEs.lookup(addr); 219 if(is_valid(tbe)) { 220 num_functional_writes := num_functional_writes + 221 testAndWrite(addr, tbe.DataBlk, pkt); 222 } 223 224 num_functional_writes := num_functional_writes + functionalMemoryWrite(pkt); 225 return num_functional_writes; 226 } 227 228 void recordRequestType(RequestType request_type, Addr addr) { 229 if (request_type == RequestType:DataArrayRead) { 230 L1cache.recordRequestType(CacheRequestType:DataArrayRead, addr); 231 } else if (request_type == RequestType:DataArrayWrite) { 232 L1cache.recordRequestType(CacheRequestType:DataArrayWrite, addr); 233 } else if (request_type == RequestType:TagArrayRead) { 234 L1cache.recordRequestType(CacheRequestType:TagArrayRead, addr); 235 } else if (request_type == RequestType:TagArrayWrite) { 236 L1cache.recordRequestType(CacheRequestType:TagArrayWrite, addr); 237 } 238 } 239 240 bool checkResourceAvailable(RequestType request_type, Addr addr) { 241 if (request_type == RequestType:DataArrayRead) { 242 return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); 243 } else if (request_type == RequestType:DataArrayWrite) { 244 return L1cache.checkResourceAvailable(CacheResourceType:DataArray, addr); 245 } else if (request_type == RequestType:TagArrayRead) { 246 return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); 247 } else if (request_type == RequestType:TagArrayWrite) { 248 return L1cache.checkResourceAvailable(CacheResourceType:TagArray, addr); 249 } else { 250 error("Invalid RequestType type in checkResourceAvailable"); 251 return true; 252 } 253 } 254 255 MachineType getCoherenceType(MachineID myMachID, 256 MachineID senderMachID) { 257 if(myMachID == senderMachID) { 258 return MachineType:TCP; 259 } else if(machineIDToMachineType(senderMachID) == MachineType:TCP) { 260 return MachineType:L1Cache_wCC; 261 } else if(machineIDToMachineType(senderMachID) == MachineType:TCC) { 262 return MachineType:TCC; 263 } else { 264 return MachineType:TCCdir; 265 } 266 } 267 268 // Out Ports 269 270 out_port(requestNetwork_out, CPURequestMsg, requestFromTCP); 271 out_port(responseNetwork_out, ResponseMsg, responseFromTCP); 272 out_port(unblockNetwork_out, UnblockMsg, unblockFromCore); 273 274 // In Ports 275 276 in_port(probeNetwork_in, TDProbeRequestMsg, probeToTCP) { 277 if (probeNetwork_in.isReady(clockEdge())) { 278 peek(probeNetwork_in, TDProbeRequestMsg, block_on="addr") { 279 DPRINTF(RubySlicc, "%s\n", in_msg); 280 DPRINTF(RubySlicc, "machineID: %s\n", machineID); 281 Entry cache_entry := getCacheEntry(in_msg.addr); 282 TBE tbe := TBEs.lookup(in_msg.addr); 283 284 if (in_msg.Type == ProbeRequestType:PrbInv) { 285 if (in_msg.ReturnData) { 286 trigger(Event:PrbInvData, in_msg.addr, cache_entry, tbe); 287 } else { 288 if(in_msg.localCtoD) { 289 trigger(Event:LocalPrbInv, in_msg.addr, cache_entry, tbe); 290 } else { 291 trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe); 292 } 293 } 294 } else if (in_msg.Type == ProbeRequestType:PrbDowngrade) { 295 assert(in_msg.ReturnData); 296 trigger(Event:PrbShrData, in_msg.addr, cache_entry, tbe); 297 } 298 } 299 } 300 } 301 302 in_port(responseToTCP_in, ResponseMsg, responseToTCP) { 303 if (responseToTCP_in.isReady(clockEdge())) { 304 peek(responseToTCP_in, ResponseMsg, block_on="addr") { 305 306 Entry cache_entry := getCacheEntry(in_msg.addr); 307 TBE tbe := TBEs.lookup(in_msg.addr); 308 309 if (in_msg.Type == CoherenceResponseType:TDSysResp) { 310 if (in_msg.State == CoherenceState:Modified) { 311 if (in_msg.CtoD) { 312 trigger(Event:TCC_AckCtoD, in_msg.addr, cache_entry, tbe); 313 } else { 314 trigger(Event:TCC_AckM, in_msg.addr, cache_entry, tbe); 315 } 316 } else if (in_msg.State == CoherenceState:Shared) { 317 trigger(Event:TCC_AckS, in_msg.addr, cache_entry, tbe); 318 } else if (in_msg.State == CoherenceState:Exclusive) { 319 trigger(Event:TCC_AckE, in_msg.addr, cache_entry, tbe); 320 } 321 } else if (in_msg.Type == CoherenceResponseType:TDSysWBAck) { 322 trigger(Event:TCC_AckWB, in_msg.addr, cache_entry, tbe); 323 } else if (in_msg.Type == CoherenceResponseType:TDSysWBNack) { 324 trigger(Event:TCC_NackWB, in_msg.addr, cache_entry, tbe); 325 } else { 326 error("Unexpected Response Message to Core"); 327 } 328 } 329 } 330 } 331 332 in_port(mandatoryQueue_in, RubyRequest, mandatoryQueue, desc="...") { 333 if (mandatoryQueue_in.isReady(clockEdge())) { 334 peek(mandatoryQueue_in, RubyRequest, block_on="LineAddress") { 335 Entry cache_entry := getCacheEntry(in_msg.LineAddress); 336 TBE tbe := TBEs.lookup(in_msg.LineAddress); 337 DPRINTF(RubySlicc, "%s\n", in_msg); 338 if (in_msg.Type == RubyRequestType:LD) { 339 if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { 340 trigger(Event:Load, in_msg.LineAddress, cache_entry, tbe); 341 } else { 342 Addr victim := L1cache.cacheProbe(in_msg.LineAddress); 343 trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); 344 } 345 } else { 346 if (is_valid(cache_entry) || L1cache.cacheAvail(in_msg.LineAddress)) { 347 trigger(Event:Store, in_msg.LineAddress, cache_entry, tbe); 348 } else { 349 Addr victim := L1cache.cacheProbe(in_msg.LineAddress); 350 trigger(Event:Repl, victim, getCacheEntry(victim), TBEs.lookup(victim)); 351 } 352 } 353 } 354 } 355 } 356 357 // Actions 358 359 action(ic_invCache, "ic", desc="invalidate cache") { 360 if(is_valid(cache_entry)) { 361 L1cache.deallocate(address); 362 } 363 unset_cache_entry(); 364 } 365 366 action(n_issueRdBlk, "n", desc="Issue RdBlk") { 367 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { 368 out_msg.addr := address; 369 out_msg.Type := CoherenceRequestType:RdBlk; 370 out_msg.Requestor := machineID; 371 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 372 TCC_select_low_bit, TCC_select_num_bits)); 373 out_msg.MessageSize := MessageSizeType:Request_Control; 374 out_msg.InitialRequestTime := curCycle(); 375 } 376 } 377 378 action(nM_issueRdBlkM, "nM", desc="Issue RdBlkM") { 379 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { 380 out_msg.addr := address; 381 out_msg.Type := CoherenceRequestType:RdBlkM; 382 out_msg.Requestor := machineID; 383 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 384 TCC_select_low_bit, TCC_select_num_bits)); 385 out_msg.MessageSize := MessageSizeType:Request_Control; 386 out_msg.InitialRequestTime := curCycle(); 387 } 388 } 389 390 action(vd_victim, "vd", desc="Victimize M/O Data") { 391 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { 392 out_msg.addr := address; 393 out_msg.Requestor := machineID; 394 assert(is_valid(cache_entry)); 395 out_msg.DataBlk := cache_entry.DataBlk; 396 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 397 TCC_select_low_bit, TCC_select_num_bits)); 398 out_msg.MessageSize := MessageSizeType:Request_Control; 399 out_msg.Type := CoherenceRequestType:VicDirty; 400 out_msg.InitialRequestTime := curCycle(); 401 if (cache_entry.CacheState == State:O) { 402 out_msg.Shared := true; 403 } else { 404 out_msg.Shared := false; 405 } 406 out_msg.Dirty := cache_entry.Dirty; 407 } 408 } 409 410 action(vc_victim, "vc", desc="Victimize E/S Data") { 411 enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { 412 out_msg.addr := address; 413 out_msg.Requestor := machineID; 414 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 415 TCC_select_low_bit, TCC_select_num_bits)); 416 out_msg.MessageSize := MessageSizeType:Request_Control; 417 out_msg.Type := CoherenceRequestType:VicClean; 418 out_msg.InitialRequestTime := curCycle(); 419 if (cache_entry.CacheState == State:S) { 420 out_msg.Shared := true; 421 } else { 422 out_msg.Shared := false; 423 } 424 } 425 } 426 427 action(a_allocate, "a", desc="allocate block") { 428 if (is_invalid(cache_entry)) { 429 set_cache_entry(L1cache.allocate(address, new Entry)); 430 } 431 } 432 433 action(t_allocateTBE, "t", desc="allocate TBE Entry") { 434 check_allocate(TBEs); 435 assert(is_valid(cache_entry)); 436 TBEs.allocate(address); 437 set_tbe(TBEs.lookup(address)); 438 tbe.DataBlk := cache_entry.DataBlk; // Data only used for WBs 439 tbe.Dirty := cache_entry.Dirty; 440 tbe.Shared := false; 441 } 442 443 action(d_deallocateTBE, "d", desc="Deallocate TBE") { 444 TBEs.deallocate(address); 445 unset_tbe(); 446 } 447 448 action(p_popMandatoryQueue, "pm", desc="Pop Mandatory Queue") { 449 mandatoryQueue_in.dequeue(clockEdge()); 450 } 451 452 action(pr_popResponseQueue, "pr", desc="Pop Response Queue") { 453 responseToTCP_in.dequeue(clockEdge()); 454 } 455 456 action(pp_popProbeQueue, "pp", desc="pop probe queue") { 457 probeNetwork_in.dequeue(clockEdge()); 458 } 459 460 action(l_loadDone, "l", desc="local load done") { 461 assert(is_valid(cache_entry)); 462 if (use_seq_not_coal) { 463 sequencer.readCallback(address, cache_entry.DataBlk, 464 false, MachineType:TCP); 465 } else { 466 coalescer.readCallback(address, MachineType:TCP, cache_entry.DataBlk); 467 } 468 } 469 470 action(xl_loadDone, "xl", desc="remote load done") { 471 peek(responseToTCP_in, ResponseMsg) { 472 assert(is_valid(cache_entry)); 473 if (use_seq_not_coal) { 474 coalescer.recordCPReadCallBack(machineID, in_msg.Sender); 475 sequencer.readCallback(address, 476 cache_entry.DataBlk, 477 false, 478 machineIDToMachineType(in_msg.Sender), 479 in_msg.InitialRequestTime, 480 in_msg.ForwardRequestTime, 481 in_msg.ProbeRequestStartTime); 482 } else { 483 MachineType cc_mach_type := getCoherenceType(machineID, 484 in_msg.Sender); 485 coalescer.readCallback(address, 486 cc_mach_type, 487 cache_entry.DataBlk, 488 in_msg.InitialRequestTime, 489 in_msg.ForwardRequestTime, 490 in_msg.ProbeRequestStartTime); 491 } 492 } 493 } 494 495 action(s_storeDone, "s", desc="local store done") { 496 assert(is_valid(cache_entry)); 497 if (use_seq_not_coal) { 498 coalescer.recordCPWriteCallBack(machineID, machineID); 499 sequencer.writeCallback(address, cache_entry.DataBlk, 500 false, MachineType:TCP); 501 } else { 502 coalescer.writeCallback(address, MachineType:TCP, cache_entry.DataBlk); 503 } 504 cache_entry.Dirty := true; 505 } 506 507 action(xs_storeDone, "xs", desc="remote store done") { 508 peek(responseToTCP_in, ResponseMsg) { 509 assert(is_valid(cache_entry)); 510 if (use_seq_not_coal) { 511 coalescer.recordCPWriteCallBack(machineID, in_msg.Sender); 512 sequencer.writeCallback(address, 513 cache_entry.DataBlk, 514 false, 515 machineIDToMachineType(in_msg.Sender), 516 in_msg.InitialRequestTime, 517 in_msg.ForwardRequestTime, 518 in_msg.ProbeRequestStartTime); 519 } else { 520 MachineType cc_mach_type := getCoherenceType(machineID, 521 in_msg.Sender); 522 coalescer.writeCallback(address, 523 cc_mach_type, 524 cache_entry.DataBlk, 525 in_msg.InitialRequestTime, 526 in_msg.ForwardRequestTime, 527 in_msg.ProbeRequestStartTime); 528 } 529 cache_entry.Dirty := true; 530 } 531 } 532 533 action(w_writeCache, "w", desc="write data to cache") { 534 peek(responseToTCP_in, ResponseMsg) { 535 assert(is_valid(cache_entry)); 536 cache_entry.DataBlk := in_msg.DataBlk; 537 cache_entry.Dirty := in_msg.Dirty; 538 } 539 } 540 541 action(ss_sendStaleNotification, "ss", desc="stale data; nothing to writeback") { 542 peek(responseToTCP_in, ResponseMsg) { 543 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 544 out_msg.addr := address; 545 out_msg.Type := CoherenceResponseType:StaleNotif; 546 out_msg.Sender := machineID; 547 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, 548 TCC_select_low_bit, TCC_select_num_bits)); 549 out_msg.MessageSize := MessageSizeType:Response_Control; 550 DPRINTF(RubySlicc, "%s\n", out_msg); 551 } 552 } 553 } 554 555 action(wb_data, "wb", desc="write back data") { 556 peek(responseToTCP_in, ResponseMsg) { 557 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 558 out_msg.addr := address; 559 out_msg.Type := CoherenceResponseType:CPUData; 560 out_msg.Sender := machineID; 561 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, 562 TCC_select_low_bit, TCC_select_num_bits)); 563 out_msg.DataBlk := tbe.DataBlk; 564 out_msg.Dirty := tbe.Dirty; 565 if (tbe.Shared) { 566 out_msg.NbReqShared := true; 567 } else { 568 out_msg.NbReqShared := false; 569 } 570 out_msg.State := CoherenceState:Shared; // faux info 571 out_msg.MessageSize := MessageSizeType:Writeback_Data; 572 DPRINTF(RubySlicc, "%s\n", out_msg); 573 } 574 } 575 } 576 577 action(piu_sendProbeResponseInvUntransferredOwnership, "piu", desc="send probe ack inv, no data, retain ownership") { 578 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 579 out_msg.addr := address; 580 out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes 581 out_msg.Sender := machineID; 582 // will this always be ok? probably not for multisocket 583 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 584 TCC_select_low_bit, TCC_select_num_bits)); 585 out_msg.Dirty := false; 586 out_msg.Hit := false; 587 out_msg.Ntsl := true; 588 out_msg.State := CoherenceState:NA; 589 out_msg.UntransferredOwner :=true; 590 out_msg.MessageSize := MessageSizeType:Response_Control; 591 } 592 } 593 594 action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") { 595 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 596 out_msg.addr := address; 597 out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes 598 out_msg.Sender := machineID; 599 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 600 TCC_select_low_bit, TCC_select_num_bits)); 601 out_msg.Dirty := false; 602 out_msg.Hit := false; 603 out_msg.Ntsl := true; 604 out_msg.State := CoherenceState:NA; 605 out_msg.MessageSize := MessageSizeType:Response_Control; 606 out_msg.isValid := isValid(address); 607 } 608 } 609 610 action(pim_sendProbeResponseInvMs, "pim", desc="send probe ack inv, no data") { 611 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 612 out_msg.addr := address; 613 out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes 614 out_msg.Sender := machineID; 615 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 616 TCC_select_low_bit, TCC_select_num_bits)); 617 out_msg.Dirty := false; 618 out_msg.Ntsl := true; 619 out_msg.Hit := false; 620 out_msg.State := CoherenceState:NA; 621 out_msg.MessageSize := MessageSizeType:Response_Control; 622 out_msg.isValid := isValid(address); 623 } 624 } 625 626 action(prm_sendProbeResponseMiss, "prm", desc="send probe ack PrbShrData, no data") { 627 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 628 out_msg.addr := address; 629 out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes 630 out_msg.Sender := machineID; 631 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 632 TCC_select_low_bit, TCC_select_num_bits)); 633 out_msg.Dirty := false; // only true if sending back data i think 634 out_msg.Hit := false; 635 out_msg.Ntsl := false; 636 out_msg.State := CoherenceState:NA; 637 out_msg.MessageSize := MessageSizeType:Response_Control; 638 out_msg.isValid := isValid(address); 639 } 640 } 641 642 action(pd_sendProbeResponseData, "pd", desc="send probe ack, with data") { 643 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 644 assert(is_valid(cache_entry) || is_valid(tbe)); 645 out_msg.addr := address; 646 out_msg.Type := CoherenceResponseType:CPUPrbResp; 647 out_msg.Sender := machineID; 648 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 649 TCC_select_low_bit, TCC_select_num_bits)); 650 out_msg.DataBlk := getDataBlock(address); 651 if (is_valid(tbe)) { 652 out_msg.Dirty := tbe.Dirty; 653 } else { 654 out_msg.Dirty := cache_entry.Dirty; 655 } 656 out_msg.Hit := true; 657 out_msg.State := CoherenceState:NA; 658 out_msg.MessageSize := MessageSizeType:Response_Data; 659 out_msg.isValid := isValid(address); 660 APPEND_TRANSITION_COMMENT("Sending ack with dirty "); 661 APPEND_TRANSITION_COMMENT(out_msg.Dirty); 662 } 663 } 664 665 action(pdm_sendProbeResponseDataMs, "pdm", desc="send probe ack, with data") { 666 enqueue(responseNetwork_out, ResponseMsg, issue_latency) { 667 assert(is_valid(cache_entry) || is_valid(tbe)); 668 assert(is_valid(cache_entry)); 669 out_msg.addr := address; 670 out_msg.Type := CoherenceResponseType:CPUPrbResp; 671 out_msg.Sender := machineID; 672 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 673 TCC_select_low_bit, TCC_select_num_bits)); 674 out_msg.DataBlk := getDataBlock(address); 675 if (is_valid(tbe)) { 676 out_msg.Dirty := tbe.Dirty; 677 } else { 678 out_msg.Dirty := cache_entry.Dirty; 679 } 680 out_msg.Hit := true; 681 out_msg.State := CoherenceState:NA; 682 out_msg.MessageSize := MessageSizeType:Response_Data; 683 out_msg.isValid := isValid(address); 684 APPEND_TRANSITION_COMMENT("Sending ack with dirty "); 685 APPEND_TRANSITION_COMMENT(out_msg.Dirty); 686 DPRINTF(RubySlicc, "Data is %s\n", out_msg.DataBlk); 687 } 688 } 689 690 action(sf_setSharedFlip, "sf", desc="hit by shared probe, status may be different") { 691 assert(is_valid(tbe)); 692 tbe.Shared := true; 693 } 694 695 action(mru_updateMRU, "mru", desc="Touch block for replacement policy") { 696 L1cache.setMRU(address); 697 } 698 699 action(uu_sendUnblock, "uu", desc="state changed, unblock") { 700 enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { 701 out_msg.addr := address; 702 out_msg.Sender := machineID; 703 out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, 704 TCC_select_low_bit, TCC_select_num_bits)); 705 out_msg.MessageSize := MessageSizeType:Unblock_Control; 706 out_msg.wasValid := isValid(address); 707 DPRINTF(RubySlicc, "%s\n", out_msg); 708 } 709 } 710 711 action(yy_recycleProbeQueue, "yy", desc="recycle probe queue") { 712 probeNetwork_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); 713 } 714 715 action(zz_recycleMandatoryQueue, "\z", desc="recycle mandatory queue") { 716 mandatoryQueue_in.recycle(clockEdge(), cyclesToTicks(recycle_latency)); 717 } 718 719 // Transitions 720 721 // transitions from base 722 transition(I, Load, I_ES) {TagArrayRead} { 723 a_allocate; 724 n_issueRdBlk; 725 p_popMandatoryQueue; 726 } 727 728 transition(I, Store, I_M) {TagArrayRead, TagArrayWrite} { 729 a_allocate; 730 nM_issueRdBlkM; 731 p_popMandatoryQueue; 732 } 733 734 transition(S, Store, S_M) {TagArrayRead} { 735 mru_updateMRU; 736 nM_issueRdBlkM; 737 p_popMandatoryQueue; 738 } 739 740 transition(E, Store, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { 741 mru_updateMRU; 742 s_storeDone; 743 p_popMandatoryQueue; 744 } 745 746 transition(O, Store, O_M) {TagArrayRead, DataArrayWrite} { 747 mru_updateMRU; 748 nM_issueRdBlkM; 749 p_popMandatoryQueue; 750 } 751 752 transition(M, Store) {TagArrayRead, DataArrayWrite} { 753 mru_updateMRU; 754 s_storeDone; 755 p_popMandatoryQueue; 756 } 757 758 // simple hit transitions 759 transition({S, E, O, M}, Load) {TagArrayRead, DataArrayRead} { 760 l_loadDone; 761 mru_updateMRU; 762 p_popMandatoryQueue; 763 } 764 765 // recycles from transients 766 transition({I_M, I_ES, ES_I, MO_I, S_M, O_M, MO_PI, I_C}, {Load, Store, Repl}) {} { 767 zz_recycleMandatoryQueue; 768 } 769 770 transition({S, E}, Repl, ES_I) {TagArrayRead} { 771 t_allocateTBE; 772 vc_victim; 773 ic_invCache; 774 } 775 776 transition({O, M}, Repl, MO_I) {TagArrayRead, DataArrayRead} { 777 t_allocateTBE; 778 vd_victim; 779 ic_invCache; 780 } 781 782 // TD event transitions 783 transition(I_M, {TCC_AckM, TCC_AckCtoD}, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} { 784 w_writeCache; 785 xs_storeDone; 786 uu_sendUnblock; 787 pr_popResponseQueue; 788 } 789 790 transition(I_ES, TCC_AckS, S) {TagArrayWrite, DataArrayWrite} { 791 w_writeCache; 792 xl_loadDone; 793 uu_sendUnblock; 794 pr_popResponseQueue; 795 } 796 797 transition(I_ES, TCC_AckE, E) {TagArrayWrite, DataArrayWrite} { 798 w_writeCache; 799 xl_loadDone; 800 uu_sendUnblock; 801 pr_popResponseQueue; 802 } 803 804 transition({S_M, O_M}, TCC_AckM, M) {TagArrayWrite, DataArrayWrite} { 805 xs_storeDone; 806 uu_sendUnblock; 807 pr_popResponseQueue; 808 } 809 810 transition({MO_I, ES_I}, TCC_NackWB, I){TagArrayWrite} { 811 d_deallocateTBE; 812 pr_popResponseQueue; 813 } 814 815 transition({MO_I, ES_I}, TCC_AckWB, I) {TagArrayWrite, DataArrayRead} { 816 wb_data; 817 d_deallocateTBE; 818 pr_popResponseQueue; 819 } 820 821 transition(I_C, TCC_AckWB, I) {TagArrayWrite} { 822 ss_sendStaleNotification; 823 d_deallocateTBE; 824 pr_popResponseQueue; 825 } 826 827 transition(I_C, TCC_NackWB, I) {TagArrayWrite} { 828 d_deallocateTBE; 829 pr_popResponseQueue; 830 } 831 832 // Probe transitions 833 transition({M, O}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { 834 pd_sendProbeResponseData; 835 ic_invCache; 836 pp_popProbeQueue; 837 } 838 839 transition(I, PrbInvData) {TagArrayRead, TagArrayWrite} { 840 prm_sendProbeResponseMiss; 841 pp_popProbeQueue; 842 } 843 844 transition({E, S}, PrbInvData, I) {TagArrayRead, TagArrayWrite} { 845 pd_sendProbeResponseData; 846 ic_invCache; 847 pp_popProbeQueue; 848 } 849 850 transition(I_C, PrbInvData, I_C) {} { 851 pi_sendProbeResponseInv; 852 ic_invCache; 853 pp_popProbeQueue; 854 } 855 856 // Needed for TCC-based protocols. Must hold on to ownership till transfer complete 857 transition({M, O}, LocalPrbInv, MO_PI){TagArrayRead, TagArrayWrite} { 858 piu_sendProbeResponseInvUntransferredOwnership; 859 pp_popProbeQueue; 860 } 861 862 // If there is a race and we see a probe invalidate, handle normally. 863 transition(MO_PI, PrbInvData, I){TagArrayWrite} { 864 pd_sendProbeResponseData; 865 ic_invCache; 866 pp_popProbeQueue; 867 } 868 869 transition(MO_PI, PrbInv, I){TagArrayWrite} { 870 pi_sendProbeResponseInv; 871 ic_invCache; 872 pp_popProbeQueue; 873 } 874 875 // normal exit when ownership is successfully transferred 876 transition(MO_PI, TCC_AckCtoD, I) {TagArrayWrite} { 877 ic_invCache; 878 pr_popResponseQueue; 879 } 880 881 transition({M, O, E, S, I}, PrbInv, I) {TagArrayRead, TagArrayWrite} { 882 pi_sendProbeResponseInv; 883 ic_invCache; 884 pp_popProbeQueue; 885 } 886 887 transition({E, S, I}, LocalPrbInv, I){TagArrayRead, TagArrayWrite} { 888 pi_sendProbeResponseInv; 889 ic_invCache; 890 pp_popProbeQueue; 891 } 892 893 894 transition({M, E, O}, PrbShrData, O) {TagArrayRead, TagArrayWrite, DataArrayRead} { 895 pd_sendProbeResponseData; 896 pp_popProbeQueue; 897 } 898 899 transition(MO_PI, PrbShrData) {DataArrayRead} { 900 pd_sendProbeResponseData; 901 pp_popProbeQueue; 902 } 903 904 905 transition(S, PrbShrData, S) {TagArrayRead, DataArrayRead} { 906 pd_sendProbeResponseData; 907 pp_popProbeQueue; 908 } 909 910 transition({I, I_C}, PrbShrData) {TagArrayRead} { 911 prm_sendProbeResponseMiss; 912 pp_popProbeQueue; 913 } 914 915 transition(I_C, PrbInv, I_C) {} { 916 pi_sendProbeResponseInv; 917 ic_invCache; 918 pp_popProbeQueue; 919 } 920 921 transition({I_M, I_ES}, {PrbInv, PrbInvData}){TagArrayRead} { 922 pi_sendProbeResponseInv; 923 ic_invCache; 924 a_allocate; // but make sure there is room for incoming data when it arrives 925 pp_popProbeQueue; 926 } 927 928 transition({I_M, I_ES}, PrbShrData) {} { 929 prm_sendProbeResponseMiss; 930 pp_popProbeQueue; 931 } 932 933 transition(S_M, PrbInvData, I_M) {TagArrayRead} { 934 pim_sendProbeResponseInvMs; 935 ic_invCache; 936 a_allocate; 937 pp_popProbeQueue; 938 } 939 940 transition(O_M, PrbInvData, I_M) {TagArrayRead,DataArrayRead} { 941 pdm_sendProbeResponseDataMs; 942 ic_invCache; 943 a_allocate; 944 pp_popProbeQueue; 945 } 946 947 transition({S_M, O_M}, {PrbInv}, I_M) {TagArrayRead} { 948 pim_sendProbeResponseInvMs; 949 ic_invCache; 950 a_allocate; 951 pp_popProbeQueue; 952 } 953 954 transition(S_M, {LocalPrbInv}, I_M) {TagArrayRead} { 955 pim_sendProbeResponseInvMs; 956 ic_invCache; 957 a_allocate; 958 pp_popProbeQueue; 959 } 960 961 transition(O_M, LocalPrbInv, I_M) {TagArrayRead} { 962 piu_sendProbeResponseInvUntransferredOwnership; 963 ic_invCache; 964 a_allocate; 965 pp_popProbeQueue; 966 } 967 968 transition({S_M, O_M}, PrbShrData) {DataArrayRead} { 969 pd_sendProbeResponseData; 970 pp_popProbeQueue; 971 } 972 973 transition(ES_I, PrbInvData, I_C){ 974 pd_sendProbeResponseData; 975 ic_invCache; 976 pp_popProbeQueue; 977 } 978 979 transition(MO_I, PrbInvData, I_C) {DataArrayRead} { 980 pd_sendProbeResponseData; 981 ic_invCache; 982 pp_popProbeQueue; 983 } 984 985 transition(MO_I, PrbInv, I_C) { 986 pi_sendProbeResponseInv; 987 ic_invCache; 988 pp_popProbeQueue; 989 } 990 991 transition(ES_I, PrbInv, I_C) { 992 pi_sendProbeResponseInv; 993 ic_invCache; 994 pp_popProbeQueue; 995 } 996 997 transition(ES_I, PrbShrData, ES_I) {DataArrayRead} { 998 pd_sendProbeResponseData; 999 sf_setSharedFlip; 1000 pp_popProbeQueue; 1001 } 1002 1003 transition(MO_I, PrbShrData, MO_I) {DataArrayRead} { 1004 pd_sendProbeResponseData; 1005 sf_setSharedFlip; 1006 pp_popProbeQueue; 1007 } 1008 1009} 1010