Cross Reference: /gem5/src/mem/ruby/system/VIPERCoalescer.cc

Deleted Added

sdiff udiff text old ( 11321:02e930db812d ) new ( 12334:e0ab29a34764 )

full compact

VIPERCoalescer.cc (11321:02e930db812d)	VIPERCoalescer.cc (12334:e0ab29a34764)
1/* 2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Sooraj Puthoor 34 */ 35	1/* 2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 3 * All rights reserved. 4 * 5 * For use for simulation and test purposes only 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, 11 * this list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * 3. Neither the name of the copyright holder nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 * 33 * Author: Sooraj Puthoor 34 */ 35
36#include "base/misc.hh"	36#include "base/logging.hh"
37#include "base/str.hh" 38#include "config/the_isa.hh" 39 40#if THE_ISA == X86_ISA 41#include "arch/x86/insts/microldstop.hh" 42 43#endif // X86_ISA 44#include "mem/ruby/system/VIPERCoalescer.hh" 45 46#include "cpu/testers/rubytest/RubyTester.hh" 47#include "debug/GPUCoalescer.hh" 48#include "debug/MemoryAccess.hh" 49#include "mem/packet.hh" 50#include "mem/ruby/common/SubBlock.hh" 51#include "mem/ruby/network/MessageBuffer.hh" 52#include "mem/ruby/profiler/Profiler.hh" 53#include "mem/ruby/slicc_interface/AbstractController.hh" 54#include "mem/ruby/slicc_interface/RubyRequest.hh" 55#include "mem/ruby/structures/CacheMemory.hh" 56#include "mem/ruby/system/GPUCoalescer.hh" 57#include "mem/ruby/system/RubySystem.hh" 58#include "params/VIPERCoalescer.hh" 59 60using namespace std; 61 62VIPERCoalescer * 63VIPERCoalescerParams::create() 64{ 65 return new VIPERCoalescer(this); 66} 67 68VIPERCoalescer::VIPERCoalescer(const Params p) 69 : GPUCoalescer(p) 70{ 71 m_max_wb_per_cycle=p->max_wb_per_cycle; 72 m_max_inv_per_cycle=p->max_inv_per_cycle; 73 m_outstanding_inv = 0; 74 m_outstanding_wb = 0; 75} 76 77VIPERCoalescer::~VIPERCoalescer() 78{ 79} 80 81// Analyzes the packet to see if this request can be coalesced. 82// If request can be coalesced, this request is added to the reqCoalescer table 83// and makeRequest returns RequestStatus_Issued; 84// If this is the first request to a cacheline, request is added to both 85// newRequests queue and to the reqCoalescer table; makeRequest 86// returns RequestStatus_Issued. 87// If there is a pending request to this cacheline and this request 88// can't be coalesced, RequestStatus_Aliased is returned and 89// the packet needs to be reissued. 90RequestStatus 91VIPERCoalescer::makeRequest(PacketPtr pkt) 92{ 93 if (m_outstanding_wb \| m_outstanding_inv) { 94 DPRINTF(GPUCoalescer, 95 "There are %d Writebacks and %d Invalidatons\n", 96 m_outstanding_wb, m_outstanding_inv); 97 } 98 // Are we in the middle of a release 99 if ((m_outstanding_wb) > 0) { 100* if (pkt->req->isKernel()) { 101 // Everythign is fine 102 // Barriers and Kernel End scan coalesce 103 // If it is a Kerenl Begin flush the cache 104 if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) { 105 invL1(); 106 } 107 108 if (pkt->req->isRelease()) { 109 insertKernel(pkt->req->contextId(), pkt); 110 } 111 112 return RequestStatus_Issued; 113 } 114// return RequestStatus_Aliased; 115 } else if (pkt->req->isKernel() && pkt->req->isRelease()) { 116 // Flush Dirty Data on Kernel End 117 // isKernel + isRelease 118 insertKernel(pkt->req->contextId(), pkt); 119 wbL1(); 120 if (m_outstanding_wb == 0) { 121 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 122 newKernelEnds.push_back(it->first); 123 } 124 completeIssue(); 125 } 126 return RequestStatus_Issued; 127 } 128 RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt); 129 if (requestStatus!=RequestStatus_Issued) { 130 // Request not isssued 131 // enqueue Retry 132 DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n"); 133 return requestStatus; 134 } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { 135 // Invalidate clean Data on Kernel Begin 136 // isKernel + isAcquire 137 invL1(); 138 } else if (pkt->req->isAcquire() && pkt->req->isRelease()) { 139 // Deschedule the AtomicAcqRel and 140 // Flush and Invalidate the L1 cache 141 invwbL1(); 142 if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 143 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 144 deschedule(issueEvent); 145 } 146 } else if (pkt->req->isRelease()) { 147 // Deschedule the StoreRel and 148 // Flush the L1 cache 149 wbL1(); 150 if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 151 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 152 deschedule(issueEvent); 153 } 154 } else if (pkt->req->isAcquire()) { 155 // LoadAcq or AtomicAcq 156 // Invalidate the L1 cache 157 invL1(); 158 } 159 // Request was successful 160 if (m_outstanding_wb == 0) { 161 if (!issueEvent.scheduled()) { 162 DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n"); 163 schedule(issueEvent, curTick()); 164 } 165 } 166 return RequestStatus_Issued; 167} 168 169void 170VIPERCoalescer::wbCallback(Addr addr) 171{ 172 m_outstanding_wb--; 173 // if L1 Flush Complete 174 // attemnpt to schedule issueEvent 175 assert(((int) m_outstanding_wb) >= 0); 176 if (m_outstanding_wb == 0) { 177 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 178 newKernelEnds.push_back(it->first); 179 } 180 completeIssue(); 181 } 182 trySendRetries(); 183} 184 185void 186VIPERCoalescer::invCallback(Addr addr) 187{ 188 m_outstanding_inv--; 189 // if L1 Flush Complete 190 // attemnpt to schedule issueEvent 191 // This probably won't happen, since 192 // we dont wait on cache invalidations 193 if (m_outstanding_wb == 0) { 194 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 195 newKernelEnds.push_back(it->first); 196 } 197 completeIssue(); 198 } 199 trySendRetries(); 200} 201 202/** 203 * Invalidate L1 cache (Acquire) 204 / 205void 206VIPERCoalescer::invL1() 207{ 208* int size = m_dataCache_ptr->getNumBlocks(); 209 DPRINTF(GPUCoalescer, 210 "There are %d Invalidations outstanding before Cache Walk\n", 211 m_outstanding_inv); 212 // Walk the cache 213 for (int i = 0; i < size; i++) { 214 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 215 // Evict Read-only data 216 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 217 clockEdge(), addr, (uint8_t) 0, 0, 0, 218* RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, 219 nullptr); 220 assert(m_mandatory_q_ptr != NULL); 221 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 222 m_outstanding_inv++; 223 } 224 DPRINTF(GPUCoalescer, 225 "There are %d Invalidatons outstanding after Cache Walk\n", 226 m_outstanding_inv); 227} 228 229/** 230 * Writeback L1 cache (Release) 231 / 232void 233VIPERCoalescer::wbL1() 234{ 235* int size = m_dataCache_ptr->getNumBlocks(); 236 DPRINTF(GPUCoalescer, 237 "There are %d Writebacks outstanding before Cache Walk\n", 238 m_outstanding_wb); 239 // Walk the cache 240 for (int i = 0; i < size; i++) { 241 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 242 // Write dirty data back 243 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 244 clockEdge(), addr, (uint8_t) 0, 0, 0, 245* RubyRequestType_FLUSH, RubyAccessMode_Supervisor, 246 nullptr); 247 assert(m_mandatory_q_ptr != NULL); 248 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 249 m_outstanding_wb++; 250 } 251 DPRINTF(GPUCoalescer, 252 "There are %d Writebacks outstanding after Cache Walk\n", 253 m_outstanding_wb); 254} 255 256/** 257 * Invalidate and Writeback L1 cache (Acquire&Release) 258 / 259void 260VIPERCoalescer::invwbL1() 261{ 262* int size = m_dataCache_ptr->getNumBlocks(); 263 // Walk the cache 264 for (int i = 0; i < size; i++) { 265 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 266 // Evict Read-only data 267 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 268 clockEdge(), addr, (uint8_t) 0, 0, 0, 269* RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, 270 nullptr); 271 assert(m_mandatory_q_ptr != NULL); 272 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 273 m_outstanding_inv++; 274 } 275 // Walk the cache 276 for (int i = 0; i< size; i++) { 277 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 278 // Write dirty data back 279 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 280 clockEdge(), addr, (uint8_t) 0, 0, 0, 281* RubyRequestType_FLUSH, RubyAccessMode_Supervisor, 282 nullptr); 283 assert(m_mandatory_q_ptr != NULL); 284 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 285 m_outstanding_wb++; 286 } 287}	37#include "base/str.hh" 38#include "config/the_isa.hh" 39 40#if THE_ISA == X86_ISA 41#include "arch/x86/insts/microldstop.hh" 42 43#endif // X86_ISA 44#include "mem/ruby/system/VIPERCoalescer.hh" 45 46#include "cpu/testers/rubytest/RubyTester.hh" 47#include "debug/GPUCoalescer.hh" 48#include "debug/MemoryAccess.hh" 49#include "mem/packet.hh" 50#include "mem/ruby/common/SubBlock.hh" 51#include "mem/ruby/network/MessageBuffer.hh" 52#include "mem/ruby/profiler/Profiler.hh" 53#include "mem/ruby/slicc_interface/AbstractController.hh" 54#include "mem/ruby/slicc_interface/RubyRequest.hh" 55#include "mem/ruby/structures/CacheMemory.hh" 56#include "mem/ruby/system/GPUCoalescer.hh" 57#include "mem/ruby/system/RubySystem.hh" 58#include "params/VIPERCoalescer.hh" 59 60using namespace std; 61 62VIPERCoalescer * 63VIPERCoalescerParams::create() 64{ 65 return new VIPERCoalescer(this); 66} 67 68VIPERCoalescer::VIPERCoalescer(const Params p) 69 : GPUCoalescer(p) 70{ 71 m_max_wb_per_cycle=p->max_wb_per_cycle; 72 m_max_inv_per_cycle=p->max_inv_per_cycle; 73 m_outstanding_inv = 0; 74 m_outstanding_wb = 0; 75} 76 77VIPERCoalescer::~VIPERCoalescer() 78{ 79} 80 81// Analyzes the packet to see if this request can be coalesced. 82// If request can be coalesced, this request is added to the reqCoalescer table 83// and makeRequest returns RequestStatus_Issued; 84// If this is the first request to a cacheline, request is added to both 85// newRequests queue and to the reqCoalescer table; makeRequest 86// returns RequestStatus_Issued. 87// If there is a pending request to this cacheline and this request 88// can't be coalesced, RequestStatus_Aliased is returned and 89// the packet needs to be reissued. 90RequestStatus 91VIPERCoalescer::makeRequest(PacketPtr pkt) 92{ 93 if (m_outstanding_wb \| m_outstanding_inv) { 94 DPRINTF(GPUCoalescer, 95 "There are %d Writebacks and %d Invalidatons\n", 96 m_outstanding_wb, m_outstanding_inv); 97 } 98 // Are we in the middle of a release 99 if ((m_outstanding_wb) > 0) { 100* if (pkt->req->isKernel()) { 101 // Everythign is fine 102 // Barriers and Kernel End scan coalesce 103 // If it is a Kerenl Begin flush the cache 104 if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) { 105 invL1(); 106 } 107 108 if (pkt->req->isRelease()) { 109 insertKernel(pkt->req->contextId(), pkt); 110 } 111 112 return RequestStatus_Issued; 113 } 114// return RequestStatus_Aliased; 115 } else if (pkt->req->isKernel() && pkt->req->isRelease()) { 116 // Flush Dirty Data on Kernel End 117 // isKernel + isRelease 118 insertKernel(pkt->req->contextId(), pkt); 119 wbL1(); 120 if (m_outstanding_wb == 0) { 121 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 122 newKernelEnds.push_back(it->first); 123 } 124 completeIssue(); 125 } 126 return RequestStatus_Issued; 127 } 128 RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt); 129 if (requestStatus!=RequestStatus_Issued) { 130 // Request not isssued 131 // enqueue Retry 132 DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n"); 133 return requestStatus; 134 } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { 135 // Invalidate clean Data on Kernel Begin 136 // isKernel + isAcquire 137 invL1(); 138 } else if (pkt->req->isAcquire() && pkt->req->isRelease()) { 139 // Deschedule the AtomicAcqRel and 140 // Flush and Invalidate the L1 cache 141 invwbL1(); 142 if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 143 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 144 deschedule(issueEvent); 145 } 146 } else if (pkt->req->isRelease()) { 147 // Deschedule the StoreRel and 148 // Flush the L1 cache 149 wbL1(); 150 if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 151 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 152 deschedule(issueEvent); 153 } 154 } else if (pkt->req->isAcquire()) { 155 // LoadAcq or AtomicAcq 156 // Invalidate the L1 cache 157 invL1(); 158 } 159 // Request was successful 160 if (m_outstanding_wb == 0) { 161 if (!issueEvent.scheduled()) { 162 DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n"); 163 schedule(issueEvent, curTick()); 164 } 165 } 166 return RequestStatus_Issued; 167} 168 169void 170VIPERCoalescer::wbCallback(Addr addr) 171{ 172 m_outstanding_wb--; 173 // if L1 Flush Complete 174 // attemnpt to schedule issueEvent 175 assert(((int) m_outstanding_wb) >= 0); 176 if (m_outstanding_wb == 0) { 177 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 178 newKernelEnds.push_back(it->first); 179 } 180 completeIssue(); 181 } 182 trySendRetries(); 183} 184 185void 186VIPERCoalescer::invCallback(Addr addr) 187{ 188 m_outstanding_inv--; 189 // if L1 Flush Complete 190 // attemnpt to schedule issueEvent 191 // This probably won't happen, since 192 // we dont wait on cache invalidations 193 if (m_outstanding_wb == 0) { 194 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 195 newKernelEnds.push_back(it->first); 196 } 197 completeIssue(); 198 } 199 trySendRetries(); 200} 201 202/** 203 * Invalidate L1 cache (Acquire) 204 / 205void 206VIPERCoalescer::invL1() 207{ 208* int size = m_dataCache_ptr->getNumBlocks(); 209 DPRINTF(GPUCoalescer, 210 "There are %d Invalidations outstanding before Cache Walk\n", 211 m_outstanding_inv); 212 // Walk the cache 213 for (int i = 0; i < size; i++) { 214 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 215 // Evict Read-only data 216 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 217 clockEdge(), addr, (uint8_t) 0, 0, 0, 218* RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, 219 nullptr); 220 assert(m_mandatory_q_ptr != NULL); 221 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 222 m_outstanding_inv++; 223 } 224 DPRINTF(GPUCoalescer, 225 "There are %d Invalidatons outstanding after Cache Walk\n", 226 m_outstanding_inv); 227} 228 229/** 230 * Writeback L1 cache (Release) 231 / 232void 233VIPERCoalescer::wbL1() 234{ 235* int size = m_dataCache_ptr->getNumBlocks(); 236 DPRINTF(GPUCoalescer, 237 "There are %d Writebacks outstanding before Cache Walk\n", 238 m_outstanding_wb); 239 // Walk the cache 240 for (int i = 0; i < size; i++) { 241 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 242 // Write dirty data back 243 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 244 clockEdge(), addr, (uint8_t) 0, 0, 0, 245* RubyRequestType_FLUSH, RubyAccessMode_Supervisor, 246 nullptr); 247 assert(m_mandatory_q_ptr != NULL); 248 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 249 m_outstanding_wb++; 250 } 251 DPRINTF(GPUCoalescer, 252 "There are %d Writebacks outstanding after Cache Walk\n", 253 m_outstanding_wb); 254} 255 256/** 257 * Invalidate and Writeback L1 cache (Acquire&Release) 258 / 259void 260VIPERCoalescer::invwbL1() 261{ 262* int size = m_dataCache_ptr->getNumBlocks(); 263 // Walk the cache 264 for (int i = 0; i < size; i++) { 265 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 266 // Evict Read-only data 267 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 268 clockEdge(), addr, (uint8_t) 0, 0, 0, 269* RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, 270 nullptr); 271 assert(m_mandatory_q_ptr != NULL); 272 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 273 m_outstanding_inv++; 274 } 275 // Walk the cache 276 for (int i = 0; i< size; i++) { 277 Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 278 // Write dirty data back 279 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 280 clockEdge(), addr, (uint8_t) 0, 0, 0, 281* RubyRequestType_FLUSH, RubyAccessMode_Supervisor, 282 nullptr); 283 assert(m_mandatory_q_ptr != NULL); 284 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 285 m_outstanding_wb++; 286 } 287}