1/* 2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its 18 * contributors may be used to endorse or promote products derived from this 19 * software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Authors: Sooraj Puthoor 34 */ 35 36#include "base/logging.hh" 37#include "base/str.hh" 38#include "config/the_isa.hh" 39 40#if THE_ISA == X86_ISA 41#include "arch/x86/insts/microldstop.hh" 42 43#endif // X86_ISA 44#include "mem/ruby/system/VIPERCoalescer.hh" 45 46#include "cpu/testers/rubytest/RubyTester.hh" 47#include "debug/GPUCoalescer.hh" 48#include "debug/MemoryAccess.hh" 49#include "mem/packet.hh" 50#include "mem/ruby/common/SubBlock.hh" 51#include "mem/ruby/network/MessageBuffer.hh" 52#include "mem/ruby/profiler/Profiler.hh" 53#include "mem/ruby/slicc_interface/AbstractController.hh" 54#include "mem/ruby/slicc_interface/RubyRequest.hh" 55#include "mem/ruby/structures/CacheMemory.hh" 56#include "mem/ruby/system/GPUCoalescer.hh" 57#include "mem/ruby/system/RubySystem.hh" 58#include "params/VIPERCoalescer.hh" 59 60using namespace std; 61 62VIPERCoalescer * 63VIPERCoalescerParams::create() 64{ 65 return new VIPERCoalescer(this); 66} 67 68VIPERCoalescer::VIPERCoalescer(const Params *p) 69 : GPUCoalescer(p) 70{ 71 m_max_wb_per_cycle=p->max_wb_per_cycle; 72 m_max_inv_per_cycle=p->max_inv_per_cycle; 73 m_outstanding_inv = 0; 74 m_outstanding_wb = 0; 75} 76 77VIPERCoalescer::~VIPERCoalescer() 78{ 79} 80 81// Analyzes the packet to see if this request can be coalesced. 82// If request can be coalesced, this request is added to the reqCoalescer table 83// and makeRequest returns RequestStatus_Issued; 84// If this is the first request to a cacheline, request is added to both 85// newRequests queue and to the reqCoalescer table; makeRequest 86// returns RequestStatus_Issued. 87// If there is a pending request to this cacheline and this request 88// can't be coalesced, RequestStatus_Aliased is returned and 89// the packet needs to be reissued. 90RequestStatus 91VIPERCoalescer::makeRequest(PacketPtr pkt) 92{ 93 if (m_outstanding_wb | m_outstanding_inv) { 94 DPRINTF(GPUCoalescer, 95 "There are %d Writebacks and %d Invalidatons\n", 96 m_outstanding_wb, m_outstanding_inv); 97 } 98 // Are we in the middle of a release 99 if ((m_outstanding_wb) > 0) { 100 if (pkt->req->isKernel()) { 101 // Everythign is fine 102 // Barriers and Kernel End scan coalesce 103 // If it is a Kerenl Begin flush the cache 104 if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) { 105 invL1(); 106 } 107 108 if (pkt->req->isRelease()) { 109 insertKernel(pkt->req->contextId(), pkt); 110 } 111 112 return RequestStatus_Issued; 113 } 114// return RequestStatus_Aliased; 115 } else if (pkt->req->isKernel() && pkt->req->isRelease()) { 116 // Flush Dirty Data on Kernel End 117 // isKernel + isRelease 118 insertKernel(pkt->req->contextId(), pkt); 119 wbL1(); 120 if (m_outstanding_wb == 0) { 121 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 122 newKernelEnds.push_back(it->first); 123 } 124 completeIssue(); 125 } 126 return RequestStatus_Issued; 127 } 128 RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt); 129 if (requestStatus!=RequestStatus_Issued) { 130 // Request not isssued 131 // enqueue Retry 132 DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n"); 133 return requestStatus; 134 } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { 135 // Invalidate clean Data on Kernel Begin 136 // isKernel + isAcquire 137 invL1(); 138 } else if (pkt->req->isAcquire() && pkt->req->isRelease()) { 139 // Deschedule the AtomicAcqRel and 140 // Flush and Invalidate the L1 cache 141 invwbL1(); 142 if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 143 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 144 deschedule(issueEvent); 145 } 146 } else if (pkt->req->isRelease()) { 147 // Deschedule the StoreRel and 148 // Flush the L1 cache 149 wbL1(); 150 if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 151 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 152 deschedule(issueEvent); 153 } 154 } else if (pkt->req->isAcquire()) { 155 // LoadAcq or AtomicAcq 156 // Invalidate the L1 cache 157 invL1(); 158 } 159 // Request was successful 160 if (m_outstanding_wb == 0) { 161 if (!issueEvent.scheduled()) { 162 DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n"); 163 schedule(issueEvent, curTick()); 164 } 165 } 166 return RequestStatus_Issued; 167} 168 169void 170VIPERCoalescer::wbCallback(Addr addr) 171{ 172 m_outstanding_wb--; 173 // if L1 Flush Complete 174 // attemnpt to schedule issueEvent 175 assert(((int) m_outstanding_wb) >= 0); 176 if (m_outstanding_wb == 0) { 177 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 178 newKernelEnds.push_back(it->first); 179 } 180 completeIssue(); 181 } 182 trySendRetries(); 183} 184 185void 186VIPERCoalescer::invCallback(Addr addr) 187{ 188 m_outstanding_inv--; 189 // if L1 Flush Complete 190 // attemnpt to schedule issueEvent 191 // This probably won't happen, since 192 // we dont wait on cache invalidations 193 if (m_outstanding_wb == 0) { 194 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 195 newKernelEnds.push_back(it->first); 196 } 197 completeIssue(); 198 } 199 trySendRetries(); 200} 201 202/** 203 * Invalidate L1 cache (Acquire) 204 */ 205void 206VIPERCoalescer::invL1() 207{ 208 int size = m_dataCache_ptr->getNumBlocks(); 209 DPRINTF(GPUCoalescer, 210 "There are %d Invalidations outstanding before Cache Walk\n", 211 m_outstanding_inv); 212 // Walk the cache 213 for (int i = 0; i < size; i++) { 214 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 215 // Evict Read-only data 216 RubyRequestType request_type = RubyRequestType_REPLACEMENT; 217 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 218 clockEdge(), addr, (uint8_t*) 0, 0, 0, 219 request_type, RubyAccessMode_Supervisor, 220 nullptr); 221 assert(m_mandatory_q_ptr != NULL); 222 Tick latency = cyclesToTicks( 223 m_controller->mandatoryQueueLatency(request_type)); 224 assert(latency > 0); 225 m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); 226 m_outstanding_inv++; 227 } 228 DPRINTF(GPUCoalescer, 229 "There are %d Invalidatons outstanding after Cache Walk\n", 230 m_outstanding_inv); 231} 232 233/** 234 * Writeback L1 cache (Release) 235 */ 236void 237VIPERCoalescer::wbL1() 238{ 239 int size = m_dataCache_ptr->getNumBlocks(); 240 DPRINTF(GPUCoalescer, 241 "There are %d Writebacks outstanding before Cache Walk\n", 242 m_outstanding_wb); 243 // Walk the cache 244 for (int i = 0; i < size; i++) { 245 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 246 // Write dirty data back 247 RubyRequestType request_type = RubyRequestType_FLUSH; 248 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 249 clockEdge(), addr, (uint8_t*) 0, 0, 0, 250 request_type, RubyAccessMode_Supervisor, 251 nullptr); 252 assert(m_mandatory_q_ptr != NULL); 253 Tick latency = cyclesToTicks( 254 m_controller->mandatoryQueueLatency(request_type)); 255 assert(latency > 0); 256 m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); 257 m_outstanding_wb++; 258 } 259 DPRINTF(GPUCoalescer, 260 "There are %d Writebacks outstanding after Cache Walk\n", 261 m_outstanding_wb); 262} 263 264/** 265 * Invalidate and Writeback L1 cache (Acquire&Release) 266 */ 267void 268VIPERCoalescer::invwbL1() 269{ 270 int size = m_dataCache_ptr->getNumBlocks(); 271 // Walk the cache 272 for (int i = 0; i < size; i++) { 273 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 274 // Evict Read-only data 275 RubyRequestType request_type = RubyRequestType_REPLACEMENT; 276 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 277 clockEdge(), addr, (uint8_t*) 0, 0, 0, 278 request_type, RubyAccessMode_Supervisor, 279 nullptr); 280 assert(m_mandatory_q_ptr != NULL); 281 Tick latency = cyclesToTicks( 282 m_controller->mandatoryQueueLatency(request_type)); 283 assert(latency > 0); 284 m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); 285 m_outstanding_inv++; 286 } 287 // Walk the cache 288 for (int i = 0; i< size; i++) { 289 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 290 // Write dirty data back 291 RubyRequestType request_type = RubyRequestType_FLUSH; 292 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 293 clockEdge(), addr, (uint8_t*) 0, 0, 0, 294 request_type, RubyAccessMode_Supervisor, 295 nullptr); 296 assert(m_mandatory_q_ptr != NULL); 297 Tick latency = cyclesToTicks( 298 m_controller->mandatoryQueueLatency(request_type)); 299 assert(latency > 0); 300 m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency); 301 m_outstanding_wb++; 302 } 303} 304