VIPERCoalescer.cc revision 11308
111674SOmar.Naji@arm.com/* 29243SN/A * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 39243SN/A * All rights reserved. 49243SN/A * 59243SN/A * For use for simulation and test purposes only 69243SN/A * 79243SN/A * Redistribution and use in source and binary forms, with or without 89243SN/A * modification, are permitted provided that the following conditions are met: 99243SN/A * 109243SN/A * 1. Redistributions of source code must retain the above copyright notice, 119243SN/A * this list of conditions and the following disclaimer. 129243SN/A * 139831SN/A * 2. Redistributions in binary form must reproduce the above copyright notice, 1410864Sjungma@eit.uni-kl.de * this list of conditions and the following disclaimer in the documentation 1511186Serfan.azarkhish@unibo.it * and/or other materials provided with the distribution. 169831SN/A * 179831SN/A * 3. Neither the name of the copyright holder nor the names of its contributors 189243SN/A * may be used to endorse or promote products derived from this software 199243SN/A * without specific prior written permission. 209243SN/A * 219243SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 229243SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 239243SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 249243SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 259243SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 269243SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 279243SN/A * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 289243SN/A * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 299243SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 309243SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 319243SN/A * POSSIBILITY OF SUCH DAMAGE. 329243SN/A * 339243SN/A * Author: Sooraj Puthoor 349243SN/A */ 359243SN/A 369243SN/A#include "base/misc.hh" 379243SN/A#include "base/str.hh" 389243SN/A#include "config/the_isa.hh" 399243SN/A 409243SN/A#if THE_ISA == X86_ISA 419243SN/A#include "arch/x86/insts/microldstop.hh" 429243SN/A 4310864Sjungma@eit.uni-kl.de#endif // X86_ISA 4410864Sjungma@eit.uni-kl.de#include "mem/ruby/system/VIPERCoalescer.hh" 4511186Serfan.azarkhish@unibo.it 469243SN/A#include "cpu/testers/rubytest/RubyTester.hh" 479243SN/A#include "debug/GPUCoalescer.hh" 489243SN/A#include "debug/MemoryAccess.hh" 499243SN/A#include "mem/packet.hh" 509243SN/A#include "mem/ruby/common/SubBlock.hh" 519243SN/A#include "mem/ruby/network/MessageBuffer.hh" 529243SN/A#include "mem/ruby/profiler/Profiler.hh" 539243SN/A#include "mem/ruby/slicc_interface/AbstractController.hh" 5410136SN/A#include "mem/ruby/slicc_interface/RubyRequest.hh" 5510136SN/A#include "mem/ruby/structures/CacheMemory.hh" 5610136SN/A#include "mem/ruby/system/GPUCoalescer.hh" 5710136SN/A#include "mem/ruby/system/RubySystem.hh" 5810136SN/A#include "params/VIPERCoalescer.hh" 5910136SN/A 6010136SN/Ausing namespace std; 619243SN/A 6210144SN/AVIPERCoalescer * 6310144SN/AVIPERCoalescerParams::create() 6410144SN/A{ 6510144SN/A return new VIPERCoalescer(this); 669243SN/A} 6710146Sandreas.hansson@arm.com 689243SN/AVIPERCoalescer::VIPERCoalescer(const Params *p) 699243SN/A : GPUCoalescer(p) 709243SN/A{ 7110146Sandreas.hansson@arm.com m_max_wb_per_cycle=p->max_wb_per_cycle; 7210146Sandreas.hansson@arm.com m_max_inv_per_cycle=p->max_inv_per_cycle; 7310146Sandreas.hansson@arm.com m_outstanding_inv = 0; 749243SN/A m_outstanding_wb = 0; 759243SN/A} 769243SN/A 779243SN/AVIPERCoalescer::~VIPERCoalescer() 789243SN/A{ 7910536Sandreas.hansson@arm.com} 8010536Sandreas.hansson@arm.com 8110536Sandreas.hansson@arm.com// Analyzes the packet to see if this request can be coalesced. 8210536Sandreas.hansson@arm.com// If request can be coalesced, this request is added to the reqCoalescer table 8310145SN/A// and makeRequest returns RequestStatus_Issued; 849972SN/A// If this is the first request to a cacheline, request is added to both 859243SN/A// newRequests queue and to the reqCoalescer table; makeRequest 8610140SN/A// returns RequestStatus_Issued. 8710140SN/A// If there is a pending request to this cacheline and this request 8810140SN/A// can't be coalesced, RequestStatus_Aliased is returned and 899972SN/A// the packet needs to be reissued. 9010140SN/ARequestStatus 9110140SN/AVIPERCoalescer::makeRequest(PacketPtr pkt) 9210140SN/A{ 9310140SN/A if (m_outstanding_wb | m_outstanding_inv) { 9410140SN/A DPRINTF(GPUCoalescer, 9510140SN/A "There are %d Writebacks and %d Invalidatons\n", 9610140SN/A m_outstanding_wb, m_outstanding_inv); 979243SN/A } 989243SN/A // Are we in the middle of a release 999489SN/A if ((m_outstanding_wb) > 0) { 10010675Sandreas.hansson@arm.com if (pkt->req->isKernel()) { 10110145SN/A // Everythign is fine 1029243SN/A // Barriers and Kernel End scan coalesce 10310141SN/A // If it is a Kerenl Begin flush the cache 10410141SN/A if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) { 10510141SN/A invL1(); 10610141SN/A } 10710489SOmar.Naji@arm.com 10810489SOmar.Naji@arm.com if (pkt->req->isRelease()) { 10910489SOmar.Naji@arm.com insertKernel(pkt->req->contextId(), pkt); 1109726SN/A } 1119726SN/A 1129726SN/A return RequestStatus_Issued; 1139726SN/A } 1149726SN/A// return RequestStatus_Aliased; 1159726SN/A } else if (pkt->req->isKernel() && pkt->req->isRelease()) { 1169726SN/A // Flush Dirty Data on Kernel End 1179489SN/A // isKernel + isRelease 1189831SN/A insertKernel(pkt->req->contextId(), pkt); 1199831SN/A wbL1(); 1209831SN/A if(m_outstanding_wb == 0) { 1219831SN/A for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 1229831SN/A newKernelEnds.push_back(it->first); 1239831SN/A } 1249489SN/A completeIssue(); 12510394Swendy.elsasser@arm.com } 12610394Swendy.elsasser@arm.com return RequestStatus_Issued; 12710394Swendy.elsasser@arm.com } 12810394Swendy.elsasser@arm.com RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt); 12910394Swendy.elsasser@arm.com if (requestStatus!=RequestStatus_Issued) { 1309489SN/A // Request not isssued 1319566SN/A // enqueue Retry 1329566SN/A DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n"); 1339566SN/A return requestStatus; 1349566SN/A } else if (pkt->req->isKernel() && pkt->req->isAcquire()) { 1359489SN/A // Invalidate clean Data on Kernel Begin 13610430SOmar.Naji@arm.com // isKernel + isAcquire 13710430SOmar.Naji@arm.com invL1(); 13810430SOmar.Naji@arm.com } else if (pkt->req->isAcquire() && pkt->req->isRelease()) { 13910430SOmar.Naji@arm.com // Deschedule the AtomicAcqRel and 14010430SOmar.Naji@arm.com // Flush and Invalidate the L1 cache 14110430SOmar.Naji@arm.com invwbL1(); 14210430SOmar.Naji@arm.com if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 14310430SOmar.Naji@arm.com DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 14410430SOmar.Naji@arm.com deschedule(issueEvent); 1459243SN/A } 1469243SN/A } else if (pkt->req->isRelease()) { 14710216Sandreas.hansson@arm.com // Deschedule the StoreRel and 14810216Sandreas.hansson@arm.com // Flush the L1 cache 14910216Sandreas.hansson@arm.com wbL1(); 1509243SN/A if (m_outstanding_wb > 0 && issueEvent.scheduled()) { 1519243SN/A DPRINTF(GPUCoalescer, "issueEvent Descheduled\n"); 1529489SN/A deschedule(issueEvent); 1539243SN/A } 1549243SN/A } else if (pkt->req->isAcquire()) { 1559489SN/A // LoadAcq or AtomicAcq 1569243SN/A // Invalidate the L1 cache 1579243SN/A invL1(); 1589489SN/A } 1599243SN/A // Request was successful 1609963SN/A if (m_outstanding_wb == 0) { 1619963SN/A if (!issueEvent.scheduled()) { 1629963SN/A DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n"); 16310210Sandreas.hansson@arm.com schedule(issueEvent, curTick()); 16410210Sandreas.hansson@arm.com } 16510210Sandreas.hansson@arm.com } 16610212Sandreas.hansson@arm.com return RequestStatus_Issued; 16710212Sandreas.hansson@arm.com} 16810212Sandreas.hansson@arm.com 1699243SN/Avoid 1709243SN/AVIPERCoalescer::wbCallback(Addr addr) 1719243SN/A{ 1729831SN/A m_outstanding_wb--; 1739831SN/A // if L1 Flush Complete 17410146Sandreas.hansson@arm.com // attemnpt to schedule issueEvent 17510394Swendy.elsasser@arm.com assert(((int) m_outstanding_wb) >= 0); 17610394Swendy.elsasser@arm.com if (m_outstanding_wb == 0) { 17710394Swendy.elsasser@arm.com for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 1789489SN/A newKernelEnds.push_back(it->first); 1799243SN/A } 18010394Swendy.elsasser@arm.com completeIssue(); 18110394Swendy.elsasser@arm.com } 18210394Swendy.elsasser@arm.com trySendRetries(); 18310394Swendy.elsasser@arm.com} 18410394Swendy.elsasser@arm.com 18510394Swendy.elsasser@arm.comvoid 1869243SN/AVIPERCoalescer::invCallback(Addr addr) 1879489SN/A{ 1889243SN/A m_outstanding_inv--; 1899243SN/A // if L1 Flush Complete 1909243SN/A // attemnpt to schedule issueEvent 1919489SN/A // This probably won't happen, since 1929243SN/A // we dont wait on cache invalidations 19310393Swendy.elsasser@arm.com if (m_outstanding_wb == 0) { 19410393Swendy.elsasser@arm.com for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) { 1959243SN/A newKernelEnds.push_back(it->first); 19610393Swendy.elsasser@arm.com } 19710393Swendy.elsasser@arm.com completeIssue(); 19810393Swendy.elsasser@arm.com } 19910393Swendy.elsasser@arm.com trySendRetries(); 20010393Swendy.elsasser@arm.com} 20110393Swendy.elsasser@arm.com 20210393Swendy.elsasser@arm.com/** 20310393Swendy.elsasser@arm.com * Invalidate L1 cache (Acquire) 20410206Sandreas.hansson@arm.com */ 2059971SN/Avoid 2069971SN/AVIPERCoalescer::invL1() 2079971SN/A{ 20810394Swendy.elsasser@arm.com int size = m_dataCache_ptr->getNumBlocks(); 20910394Swendy.elsasser@arm.com DPRINTF(GPUCoalescer, 21010394Swendy.elsasser@arm.com "There are %d Invalidations outstanding before Cache Walk\n", 2119488SN/A m_outstanding_inv); 2129488SN/A // Walk the cache 2139489SN/A for (int i = 0; i < size; i++) { 2149489SN/A Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 2159488SN/A // Evict Read-only data 21610430SOmar.Naji@arm.com std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 21710430SOmar.Naji@arm.com clockEdge(), addr, (uint8_t*) 0, 0, 0, 21810430SOmar.Naji@arm.com RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, 21910430SOmar.Naji@arm.com nullptr); 22010430SOmar.Naji@arm.com assert(m_mandatory_q_ptr != NULL); 22110430SOmar.Naji@arm.com m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 22210430SOmar.Naji@arm.com m_outstanding_inv++; 22310430SOmar.Naji@arm.com } 22410430SOmar.Naji@arm.com DPRINTF(GPUCoalescer, 22510430SOmar.Naji@arm.com "There are %d Invalidatons outstanding after Cache Walk\n", 22610430SOmar.Naji@arm.com m_outstanding_inv); 22710430SOmar.Naji@arm.com} 22810430SOmar.Naji@arm.com 2299488SN/A/** 2309243SN/A * Writeback L1 cache (Release) 2319243SN/A */ 2329963SN/Avoid 2339243SN/AVIPERCoalescer::wbL1() 23410430SOmar.Naji@arm.com{ 23510430SOmar.Naji@arm.com int size = m_dataCache_ptr->getNumBlocks(); 23610430SOmar.Naji@arm.com DPRINTF(GPUCoalescer, 23710430SOmar.Naji@arm.com "There are %d Writebacks outstanding before Cache Walk\n", 23810430SOmar.Naji@arm.com m_outstanding_wb); 23910430SOmar.Naji@arm.com // Walk the cache 24010430SOmar.Naji@arm.com for (int i = 0; i < size; i++) { 24110430SOmar.Naji@arm.com Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 24210430SOmar.Naji@arm.com // Write dirty data back 24310430SOmar.Naji@arm.com std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 24410430SOmar.Naji@arm.com clockEdge(), addr, (uint8_t*) 0, 0, 0, 24510430SOmar.Naji@arm.com RubyRequestType_FLUSH, RubyAccessMode_Supervisor, 24610430SOmar.Naji@arm.com nullptr); 24710430SOmar.Naji@arm.com assert(m_mandatory_q_ptr != NULL); 24810430SOmar.Naji@arm.com m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 24910430SOmar.Naji@arm.com m_outstanding_wb++; 25010430SOmar.Naji@arm.com } 25110430SOmar.Naji@arm.com DPRINTF(GPUCoalescer, 25210430SOmar.Naji@arm.com "There are %d Writebacks outstanding after Cache Walk\n", 25310430SOmar.Naji@arm.com m_outstanding_wb); 25410430SOmar.Naji@arm.com} 25510430SOmar.Naji@arm.com 25610430SOmar.Naji@arm.com/** 25710430SOmar.Naji@arm.com * Invalidate and Writeback L1 cache (Acquire&Release) 25810430SOmar.Naji@arm.com */ 25910430SOmar.Naji@arm.comvoid 26010430SOmar.Naji@arm.comVIPERCoalescer::invwbL1() 26110430SOmar.Naji@arm.com{ 26210430SOmar.Naji@arm.com int size = m_dataCache_ptr->getNumBlocks(); 26310430SOmar.Naji@arm.com // Walk the cache 26410430SOmar.Naji@arm.com for(int i = 0; i < size; i++) { 26510430SOmar.Naji@arm.com Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 26610430SOmar.Naji@arm.com // Evict Read-only data 26710430SOmar.Naji@arm.com std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 26810430SOmar.Naji@arm.com clockEdge(), addr, (uint8_t*) 0, 0, 0, 26910430SOmar.Naji@arm.com RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, 27010430SOmar.Naji@arm.com nullptr); 27110430SOmar.Naji@arm.com assert(m_mandatory_q_ptr != NULL); 27210430SOmar.Naji@arm.com m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 27310430SOmar.Naji@arm.com m_outstanding_inv++; 27410430SOmar.Naji@arm.com } 27510430SOmar.Naji@arm.com // Walk the cache 27610430SOmar.Naji@arm.com for(int i = 0; i< size; i++) { 27710430SOmar.Naji@arm.com Addr addr = m_dataCache_ptr->getAddressAtIdx(i); 27810430SOmar.Naji@arm.com // Write dirty data back 27910430SOmar.Naji@arm.com std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>( 28010430SOmar.Naji@arm.com clockEdge(), addr, (uint8_t*) 0, 0, 0, 28110430SOmar.Naji@arm.com RubyRequestType_FLUSH, RubyAccessMode_Supervisor, 28210430SOmar.Naji@arm.com nullptr); 28310430SOmar.Naji@arm.com assert(m_mandatory_q_ptr != NULL); 28410430SOmar.Naji@arm.com m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 28510430SOmar.Naji@arm.com m_outstanding_wb++; 28610430SOmar.Naji@arm.com } 28710430SOmar.Naji@arm.com} 28810430SOmar.Naji@arm.com