GPUCoalescer.cc revision 11308
111308Santhony.gutierrez@amd.com/* 211308Santhony.gutierrez@amd.com * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 311308Santhony.gutierrez@amd.com * All rights reserved. 411308Santhony.gutierrez@amd.com * 511308Santhony.gutierrez@amd.com * For use for simulation and test purposes only 611308Santhony.gutierrez@amd.com * 711308Santhony.gutierrez@amd.com * Redistribution and use in source and binary forms, with or without 811308Santhony.gutierrez@amd.com * modification, are permitted provided that the following conditions are met: 911308Santhony.gutierrez@amd.com * 1011308Santhony.gutierrez@amd.com * 1. Redistributions of source code must retain the above copyright notice, 1111308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer. 1211308Santhony.gutierrez@amd.com * 1311308Santhony.gutierrez@amd.com * 2. Redistributions in binary form must reproduce the above copyright notice, 1411308Santhony.gutierrez@amd.com * this list of conditions and the following disclaimer in the documentation 1511308Santhony.gutierrez@amd.com * and/or other materials provided with the distribution. 1611308Santhony.gutierrez@amd.com * 1711308Santhony.gutierrez@amd.com * 3. Neither the name of the copyright holder nor the names of its contributors 1811308Santhony.gutierrez@amd.com * may be used to endorse or promote products derived from this software 1911308Santhony.gutierrez@amd.com * without specific prior written permission. 2011308Santhony.gutierrez@amd.com * 2111308Santhony.gutierrez@amd.com * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 2211308Santhony.gutierrez@amd.com * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2311308Santhony.gutierrez@amd.com * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2411308Santhony.gutierrez@amd.com * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 2511308Santhony.gutierrez@amd.com * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 2611308Santhony.gutierrez@amd.com * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 2711308Santhony.gutierrez@amd.com * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 2811308Santhony.gutierrez@amd.com * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 2911308Santhony.gutierrez@amd.com * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 3011308Santhony.gutierrez@amd.com * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 3111308Santhony.gutierrez@amd.com * POSSIBILITY OF SUCH DAMAGE. 3211308Santhony.gutierrez@amd.com * 3311308Santhony.gutierrez@amd.com * Author: Sooraj Puthoor 3411308Santhony.gutierrez@amd.com */ 3511308Santhony.gutierrez@amd.com 3611308Santhony.gutierrez@amd.com#include "base/misc.hh" 3711308Santhony.gutierrez@amd.com#include "base/str.hh" 3811308Santhony.gutierrez@amd.com#include "config/the_isa.hh" 3911308Santhony.gutierrez@amd.com 4011308Santhony.gutierrez@amd.com#if THE_ISA == X86_ISA 4111308Santhony.gutierrez@amd.com#include "arch/x86/insts/microldstop.hh" 4211308Santhony.gutierrez@amd.com 4311308Santhony.gutierrez@amd.com#endif // X86_ISA 4411308Santhony.gutierrez@amd.com#include "mem/ruby/system/GPUCoalescer.hh" 4511308Santhony.gutierrez@amd.com 4611308Santhony.gutierrez@amd.com#include "cpu/testers/rubytest/RubyTester.hh" 4711308Santhony.gutierrez@amd.com#include "debug/GPUCoalescer.hh" 4811308Santhony.gutierrez@amd.com#include "debug/MemoryAccess.hh" 4911308Santhony.gutierrez@amd.com#include "debug/ProtocolTrace.hh" 5011308Santhony.gutierrez@amd.com#include "debug/RubyPort.hh" 5111308Santhony.gutierrez@amd.com#include "debug/RubyStats.hh" 5211308Santhony.gutierrez@amd.com#include "gpu-compute/shader.hh" 5311308Santhony.gutierrez@amd.com#include "mem/packet.hh" 5411308Santhony.gutierrez@amd.com#include "mem/ruby/common/DataBlock.hh" 5511308Santhony.gutierrez@amd.com#include "mem/ruby/common/SubBlock.hh" 5611308Santhony.gutierrez@amd.com#include "mem/ruby/network/MessageBuffer.hh" 5711308Santhony.gutierrez@amd.com#include "mem/ruby/profiler/Profiler.hh" 5811308Santhony.gutierrez@amd.com#include "mem/ruby/slicc_interface/AbstractController.hh" 5911308Santhony.gutierrez@amd.com#include "mem/ruby/slicc_interface/RubyRequest.hh" 6011308Santhony.gutierrez@amd.com#include "mem/ruby/structures/CacheMemory.hh" 6111308Santhony.gutierrez@amd.com#include "mem/ruby/system/RubySystem.hh" 6211308Santhony.gutierrez@amd.com#include "params/RubyGPUCoalescer.hh" 6311308Santhony.gutierrez@amd.com 6411308Santhony.gutierrez@amd.comusing namespace std; 6511308Santhony.gutierrez@amd.com 6611308Santhony.gutierrez@amd.comGPUCoalescer * 6711308Santhony.gutierrez@amd.comRubyGPUCoalescerParams::create() 6811308Santhony.gutierrez@amd.com{ 6911308Santhony.gutierrez@amd.com return new GPUCoalescer(this); 7011308Santhony.gutierrez@amd.com} 7111308Santhony.gutierrez@amd.com 7211308Santhony.gutierrez@amd.comHSAScope 7311308Santhony.gutierrez@amd.comreqScopeToHSAScope(Request* req) 7411308Santhony.gutierrez@amd.com{ 7511308Santhony.gutierrez@amd.com HSAScope accessScope = HSAScope_UNSPECIFIED; 7611308Santhony.gutierrez@amd.com if (req->isScoped()) { 7711308Santhony.gutierrez@amd.com if (req->isWavefrontScope()) { 7811308Santhony.gutierrez@amd.com accessScope = HSAScope_WAVEFRONT; 7911308Santhony.gutierrez@amd.com } else if (req->isWorkgroupScope()) { 8011308Santhony.gutierrez@amd.com accessScope = HSAScope_WORKGROUP; 8111308Santhony.gutierrez@amd.com } else if (req->isDeviceScope()) { 8211308Santhony.gutierrez@amd.com accessScope = HSAScope_DEVICE; 8311308Santhony.gutierrez@amd.com } else if (req->isSystemScope()) { 8411308Santhony.gutierrez@amd.com accessScope = HSAScope_SYSTEM; 8511308Santhony.gutierrez@amd.com } else { 8611308Santhony.gutierrez@amd.com fatal("Bad scope type"); 8711308Santhony.gutierrez@amd.com } 8811308Santhony.gutierrez@amd.com } 8911308Santhony.gutierrez@amd.com return accessScope; 9011308Santhony.gutierrez@amd.com} 9111308Santhony.gutierrez@amd.com 9211308Santhony.gutierrez@amd.comHSASegment 9311308Santhony.gutierrez@amd.comreqSegmentToHSASegment(Request* req) 9411308Santhony.gutierrez@amd.com{ 9511308Santhony.gutierrez@amd.com HSASegment accessSegment = HSASegment_GLOBAL; 9611308Santhony.gutierrez@amd.com 9711308Santhony.gutierrez@amd.com if (req->isGlobalSegment()) { 9811308Santhony.gutierrez@amd.com accessSegment = HSASegment_GLOBAL; 9911308Santhony.gutierrez@amd.com } else if (req->isGroupSegment()) { 10011308Santhony.gutierrez@amd.com accessSegment = HSASegment_GROUP; 10111308Santhony.gutierrez@amd.com } else if (req->isPrivateSegment()) { 10211308Santhony.gutierrez@amd.com accessSegment = HSASegment_PRIVATE; 10311308Santhony.gutierrez@amd.com } else if (req->isKernargSegment()) { 10411308Santhony.gutierrez@amd.com accessSegment = HSASegment_KERNARG; 10511308Santhony.gutierrez@amd.com } else if (req->isReadonlySegment()) { 10611308Santhony.gutierrez@amd.com accessSegment = HSASegment_READONLY; 10711308Santhony.gutierrez@amd.com } else if (req->isSpillSegment()) { 10811308Santhony.gutierrez@amd.com accessSegment = HSASegment_SPILL; 10911308Santhony.gutierrez@amd.com } else if (req->isArgSegment()) { 11011308Santhony.gutierrez@amd.com accessSegment = HSASegment_ARG; 11111308Santhony.gutierrez@amd.com } else { 11211308Santhony.gutierrez@amd.com fatal("Bad segment type"); 11311308Santhony.gutierrez@amd.com } 11411308Santhony.gutierrez@amd.com 11511308Santhony.gutierrez@amd.com return accessSegment; 11611308Santhony.gutierrez@amd.com} 11711308Santhony.gutierrez@amd.com 11811308Santhony.gutierrez@amd.comGPUCoalescer::GPUCoalescer(const Params *p) 11911308Santhony.gutierrez@amd.com : RubyPort(p), issueEvent(this), deadlockCheckEvent(this) 12011308Santhony.gutierrez@amd.com{ 12111308Santhony.gutierrez@amd.com m_store_waiting_on_load_cycles = 0; 12211308Santhony.gutierrez@amd.com m_store_waiting_on_store_cycles = 0; 12311308Santhony.gutierrez@amd.com m_load_waiting_on_store_cycles = 0; 12411308Santhony.gutierrez@amd.com m_load_waiting_on_load_cycles = 0; 12511308Santhony.gutierrez@amd.com 12611308Santhony.gutierrez@amd.com m_outstanding_count = 0; 12711308Santhony.gutierrez@amd.com 12811308Santhony.gutierrez@amd.com m_max_outstanding_requests = 0; 12911308Santhony.gutierrez@amd.com m_deadlock_threshold = 0; 13011308Santhony.gutierrez@amd.com m_instCache_ptr = nullptr; 13111308Santhony.gutierrez@amd.com m_dataCache_ptr = nullptr; 13211308Santhony.gutierrez@amd.com 13311308Santhony.gutierrez@amd.com m_instCache_ptr = p->icache; 13411308Santhony.gutierrez@amd.com m_dataCache_ptr = p->dcache; 13511308Santhony.gutierrez@amd.com m_max_outstanding_requests = p->max_outstanding_requests; 13611308Santhony.gutierrez@amd.com m_deadlock_threshold = p->deadlock_threshold; 13711308Santhony.gutierrez@amd.com 13811308Santhony.gutierrez@amd.com assert(m_max_outstanding_requests > 0); 13911308Santhony.gutierrez@amd.com assert(m_deadlock_threshold > 0); 14011308Santhony.gutierrez@amd.com assert(m_instCache_ptr); 14111308Santhony.gutierrez@amd.com assert(m_dataCache_ptr); 14211308Santhony.gutierrez@amd.com 14311308Santhony.gutierrez@amd.com m_data_cache_hit_latency = p->dcache_hit_latency; 14411308Santhony.gutierrez@amd.com 14511308Santhony.gutierrez@amd.com m_usingNetworkTester = p->using_network_tester; 14611308Santhony.gutierrez@amd.com assumingRfOCoherence = p->assume_rfo; 14711308Santhony.gutierrez@amd.com} 14811308Santhony.gutierrez@amd.com 14911308Santhony.gutierrez@amd.comGPUCoalescer::~GPUCoalescer() 15011308Santhony.gutierrez@amd.com{ 15111308Santhony.gutierrez@amd.com} 15211308Santhony.gutierrez@amd.com 15311308Santhony.gutierrez@amd.comvoid 15411308Santhony.gutierrez@amd.comGPUCoalescer::wakeup() 15511308Santhony.gutierrez@amd.com{ 15611308Santhony.gutierrez@amd.com // Check for deadlock of any of the requests 15711308Santhony.gutierrez@amd.com Cycles current_time = curCycle(); 15811308Santhony.gutierrez@amd.com 15911308Santhony.gutierrez@amd.com // Check across all outstanding requests 16011308Santhony.gutierrez@amd.com int total_outstanding = 0; 16111308Santhony.gutierrez@amd.com 16211308Santhony.gutierrez@amd.com RequestTable::iterator read = m_readRequestTable.begin(); 16311308Santhony.gutierrez@amd.com RequestTable::iterator read_end = m_readRequestTable.end(); 16411308Santhony.gutierrez@amd.com for (; read != read_end; ++read) { 16511308Santhony.gutierrez@amd.com GPUCoalescerRequest* request = read->second; 16611308Santhony.gutierrez@amd.com if (current_time - request->issue_time < m_deadlock_threshold) 16711308Santhony.gutierrez@amd.com continue; 16811308Santhony.gutierrez@amd.com 16911308Santhony.gutierrez@amd.com panic("Possible Deadlock detected. Aborting!\n" 17011308Santhony.gutierrez@amd.com "version: %d request.paddr: 0x%x m_readRequestTable: %d " 17111308Santhony.gutierrez@amd.com "current time: %u issue_time: %d difference: %d\n", m_version, 17211308Santhony.gutierrez@amd.com request->pkt->getAddr(), m_readRequestTable.size(), 17311308Santhony.gutierrez@amd.com current_time * clockPeriod(), request->issue_time * clockPeriod(), 17411308Santhony.gutierrez@amd.com (current_time - request->issue_time)*clockPeriod()); 17511308Santhony.gutierrez@amd.com } 17611308Santhony.gutierrez@amd.com 17711308Santhony.gutierrez@amd.com RequestTable::iterator write = m_writeRequestTable.begin(); 17811308Santhony.gutierrez@amd.com RequestTable::iterator write_end = m_writeRequestTable.end(); 17911308Santhony.gutierrez@amd.com for (; write != write_end; ++write) { 18011308Santhony.gutierrez@amd.com GPUCoalescerRequest* request = write->second; 18111308Santhony.gutierrez@amd.com if (current_time - request->issue_time < m_deadlock_threshold) 18211308Santhony.gutierrez@amd.com continue; 18311308Santhony.gutierrez@amd.com 18411308Santhony.gutierrez@amd.com panic("Possible Deadlock detected. Aborting!\n" 18511308Santhony.gutierrez@amd.com "version: %d request.paddr: 0x%x m_writeRequestTable: %d " 18611308Santhony.gutierrez@amd.com "current time: %u issue_time: %d difference: %d\n", m_version, 18711308Santhony.gutierrez@amd.com request->pkt->getAddr(), m_writeRequestTable.size(), 18811308Santhony.gutierrez@amd.com current_time * clockPeriod(), request->issue_time * clockPeriod(), 18911308Santhony.gutierrez@amd.com (current_time - request->issue_time) * clockPeriod()); 19011308Santhony.gutierrez@amd.com } 19111308Santhony.gutierrez@amd.com 19211308Santhony.gutierrez@amd.com total_outstanding += m_writeRequestTable.size(); 19311308Santhony.gutierrez@amd.com total_outstanding += m_readRequestTable.size(); 19411308Santhony.gutierrez@amd.com 19511308Santhony.gutierrez@amd.com assert(m_outstanding_count == total_outstanding); 19611308Santhony.gutierrez@amd.com 19711308Santhony.gutierrez@amd.com if (m_outstanding_count > 0) { 19811308Santhony.gutierrez@amd.com // If there are still outstanding requests, keep checking 19911308Santhony.gutierrez@amd.com schedule(deadlockCheckEvent, 20011308Santhony.gutierrez@amd.com m_deadlock_threshold * clockPeriod() + 20111308Santhony.gutierrez@amd.com curTick()); 20211308Santhony.gutierrez@amd.com } 20311308Santhony.gutierrez@amd.com} 20411308Santhony.gutierrez@amd.com 20511308Santhony.gutierrez@amd.comvoid 20611308Santhony.gutierrez@amd.comGPUCoalescer::resetStats() 20711308Santhony.gutierrez@amd.com{ 20811308Santhony.gutierrez@amd.com m_latencyHist.reset(); 20911308Santhony.gutierrez@amd.com m_missLatencyHist.reset(); 21011308Santhony.gutierrez@amd.com for (int i = 0; i < RubyRequestType_NUM; i++) { 21111308Santhony.gutierrez@amd.com m_typeLatencyHist[i]->reset(); 21211308Santhony.gutierrez@amd.com m_missTypeLatencyHist[i]->reset(); 21311308Santhony.gutierrez@amd.com for (int j = 0; j < MachineType_NUM; j++) { 21411308Santhony.gutierrez@amd.com m_missTypeMachLatencyHist[i][j]->reset(); 21511308Santhony.gutierrez@amd.com } 21611308Santhony.gutierrez@amd.com } 21711308Santhony.gutierrez@amd.com 21811308Santhony.gutierrez@amd.com for (int i = 0; i < MachineType_NUM; i++) { 21911308Santhony.gutierrez@amd.com m_missMachLatencyHist[i]->reset(); 22011308Santhony.gutierrez@amd.com 22111308Santhony.gutierrez@amd.com m_IssueToInitialDelayHist[i]->reset(); 22211308Santhony.gutierrez@amd.com m_InitialToForwardDelayHist[i]->reset(); 22311308Santhony.gutierrez@amd.com m_ForwardToFirstResponseDelayHist[i]->reset(); 22411308Santhony.gutierrez@amd.com m_FirstResponseToCompletionDelayHist[i]->reset(); 22511308Santhony.gutierrez@amd.com } 22611308Santhony.gutierrez@amd.com} 22711308Santhony.gutierrez@amd.com 22811308Santhony.gutierrez@amd.comvoid 22911308Santhony.gutierrez@amd.comGPUCoalescer::printProgress(ostream& out) const 23011308Santhony.gutierrez@amd.com{ 23111308Santhony.gutierrez@amd.com} 23211308Santhony.gutierrez@amd.com 23311308Santhony.gutierrez@amd.comRequestStatus 23411308Santhony.gutierrez@amd.comGPUCoalescer::getRequestStatus(PacketPtr pkt, RubyRequestType request_type) 23511308Santhony.gutierrez@amd.com{ 23611308Santhony.gutierrez@amd.com Addr line_addr = makeLineAddress(pkt->getAddr()); 23711308Santhony.gutierrez@amd.com 23811308Santhony.gutierrez@amd.com if (!m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())) { 23911308Santhony.gutierrez@amd.com return RequestStatus_BufferFull; 24011308Santhony.gutierrez@amd.com } 24111308Santhony.gutierrez@amd.com 24211308Santhony.gutierrez@amd.com if(m_controller->isBlocked(line_addr) && 24311308Santhony.gutierrez@amd.com request_type != RubyRequestType_Locked_RMW_Write) { 24411308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 24511308Santhony.gutierrez@amd.com } 24611308Santhony.gutierrez@amd.com 24711308Santhony.gutierrez@amd.com if ((request_type == RubyRequestType_ST) || 24811308Santhony.gutierrez@amd.com (request_type == RubyRequestType_ATOMIC) || 24911308Santhony.gutierrez@amd.com (request_type == RubyRequestType_ATOMIC_RETURN) || 25011308Santhony.gutierrez@amd.com (request_type == RubyRequestType_ATOMIC_NO_RETURN) || 25111308Santhony.gutierrez@amd.com (request_type == RubyRequestType_RMW_Read) || 25211308Santhony.gutierrez@amd.com (request_type == RubyRequestType_RMW_Write) || 25311308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Load_Linked) || 25411308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Store_Conditional) || 25511308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Locked_RMW_Read) || 25611308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Locked_RMW_Write) || 25711308Santhony.gutierrez@amd.com (request_type == RubyRequestType_FLUSH)) { 25811308Santhony.gutierrez@amd.com 25911308Santhony.gutierrez@amd.com // Check if there is any outstanding read request for the same 26011308Santhony.gutierrez@amd.com // cache line. 26111308Santhony.gutierrez@amd.com if (m_readRequestTable.count(line_addr) > 0) { 26211308Santhony.gutierrez@amd.com m_store_waiting_on_load_cycles++; 26311308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 26411308Santhony.gutierrez@amd.com } 26511308Santhony.gutierrez@amd.com 26611308Santhony.gutierrez@amd.com if (m_writeRequestTable.count(line_addr) > 0) { 26711308Santhony.gutierrez@amd.com // There is an outstanding write request for the cache line 26811308Santhony.gutierrez@amd.com m_store_waiting_on_store_cycles++; 26911308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 27011308Santhony.gutierrez@amd.com } 27111308Santhony.gutierrez@amd.com } else { 27211308Santhony.gutierrez@amd.com // Check if there is any outstanding write request for the same 27311308Santhony.gutierrez@amd.com // cache line. 27411308Santhony.gutierrez@amd.com if (m_writeRequestTable.count(line_addr) > 0) { 27511308Santhony.gutierrez@amd.com m_load_waiting_on_store_cycles++; 27611308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 27711308Santhony.gutierrez@amd.com } 27811308Santhony.gutierrez@amd.com 27911308Santhony.gutierrez@amd.com if (m_readRequestTable.count(line_addr) > 0) { 28011308Santhony.gutierrez@amd.com // There is an outstanding read request for the cache line 28111308Santhony.gutierrez@amd.com m_load_waiting_on_load_cycles++; 28211308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 28311308Santhony.gutierrez@amd.com } 28411308Santhony.gutierrez@amd.com } 28511308Santhony.gutierrez@amd.com 28611308Santhony.gutierrez@amd.com return RequestStatus_Ready; 28711308Santhony.gutierrez@amd.com 28811308Santhony.gutierrez@amd.com} 28911308Santhony.gutierrez@amd.com 29011308Santhony.gutierrez@amd.com 29111308Santhony.gutierrez@amd.com 29211308Santhony.gutierrez@amd.com// sets the kernelEndList 29311308Santhony.gutierrez@amd.comvoid 29411308Santhony.gutierrez@amd.comGPUCoalescer::insertKernel(int wavefront_id, PacketPtr pkt) 29511308Santhony.gutierrez@amd.com{ 29611308Santhony.gutierrez@amd.com // Don't know if this will happen or is possible 29711308Santhony.gutierrez@amd.com // but I just want to be careful and not have it become 29811308Santhony.gutierrez@amd.com // simulator hang in the future 29911308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, "inserting wf: %d to kernelEndlist\n", wavefront_id); 30011308Santhony.gutierrez@amd.com assert(kernelEndList.count(wavefront_id) == 0); 30111308Santhony.gutierrez@amd.com 30211308Santhony.gutierrez@amd.com kernelEndList[wavefront_id] = pkt; 30311308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, "kernelEndList->size() = %d\n", 30411308Santhony.gutierrez@amd.com kernelEndList.size()); 30511308Santhony.gutierrez@amd.com} 30611308Santhony.gutierrez@amd.com 30711308Santhony.gutierrez@amd.com 30811308Santhony.gutierrez@amd.com// Insert the request on the correct request table. Return true if 30911308Santhony.gutierrez@amd.com// the entry was already present. 31011308Santhony.gutierrez@amd.combool 31111308Santhony.gutierrez@amd.comGPUCoalescer::insertRequest(PacketPtr pkt, RubyRequestType request_type) 31211308Santhony.gutierrez@amd.com{ 31311308Santhony.gutierrez@amd.com assert(getRequestStatus(pkt, request_type) == RequestStatus_Ready || 31411308Santhony.gutierrez@amd.com pkt->req->isLockedRMW() || 31511308Santhony.gutierrez@amd.com !m_mandatory_q_ptr->areNSlotsAvailable(1, clockEdge())); 31611308Santhony.gutierrez@amd.com 31711308Santhony.gutierrez@amd.com int total_outstanding M5_VAR_USED = 31811308Santhony.gutierrez@amd.com m_writeRequestTable.size() + m_readRequestTable.size(); 31911308Santhony.gutierrez@amd.com 32011308Santhony.gutierrez@amd.com assert(m_outstanding_count == total_outstanding); 32111308Santhony.gutierrez@amd.com 32211308Santhony.gutierrez@amd.com // See if we should schedule a deadlock check 32311308Santhony.gutierrez@amd.com if (deadlockCheckEvent.scheduled() == false) { 32411308Santhony.gutierrez@amd.com schedule(deadlockCheckEvent, m_deadlock_threshold + curTick()); 32511308Santhony.gutierrez@amd.com } 32611308Santhony.gutierrez@amd.com 32711308Santhony.gutierrez@amd.com Addr line_addr = makeLineAddress(pkt->getAddr()); 32811308Santhony.gutierrez@amd.com if ((request_type == RubyRequestType_ST) || 32911308Santhony.gutierrez@amd.com (request_type == RubyRequestType_ATOMIC) || 33011308Santhony.gutierrez@amd.com (request_type == RubyRequestType_ATOMIC_RETURN) || 33111308Santhony.gutierrez@amd.com (request_type == RubyRequestType_ATOMIC_NO_RETURN) || 33211308Santhony.gutierrez@amd.com (request_type == RubyRequestType_RMW_Read) || 33311308Santhony.gutierrez@amd.com (request_type == RubyRequestType_RMW_Write) || 33411308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Load_Linked) || 33511308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Store_Conditional) || 33611308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Locked_RMW_Read) || 33711308Santhony.gutierrez@amd.com (request_type == RubyRequestType_Locked_RMW_Write) || 33811308Santhony.gutierrez@amd.com (request_type == RubyRequestType_FLUSH)) { 33911308Santhony.gutierrez@amd.com 34011308Santhony.gutierrez@amd.com pair<RequestTable::iterator, bool> r = 34111308Santhony.gutierrez@amd.com m_writeRequestTable.insert(RequestTable::value_type(line_addr, 34211308Santhony.gutierrez@amd.com (GPUCoalescerRequest*) NULL)); 34311308Santhony.gutierrez@amd.com if (r.second) { 34411308Santhony.gutierrez@amd.com RequestTable::iterator i = r.first; 34511308Santhony.gutierrez@amd.com i->second = new GPUCoalescerRequest(pkt, request_type, 34611308Santhony.gutierrez@amd.com curCycle()); 34711308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, 34811308Santhony.gutierrez@amd.com "Inserting write request for paddr %#x for type %d\n", 34911308Santhony.gutierrez@amd.com pkt->req->getPaddr(), i->second->m_type); 35011308Santhony.gutierrez@amd.com m_outstanding_count++; 35111308Santhony.gutierrez@amd.com } else { 35211308Santhony.gutierrez@amd.com return true; 35311308Santhony.gutierrez@amd.com } 35411308Santhony.gutierrez@amd.com } else { 35511308Santhony.gutierrez@amd.com pair<RequestTable::iterator, bool> r = 35611308Santhony.gutierrez@amd.com m_readRequestTable.insert(RequestTable::value_type(line_addr, 35711308Santhony.gutierrez@amd.com (GPUCoalescerRequest*) NULL)); 35811308Santhony.gutierrez@amd.com 35911308Santhony.gutierrez@amd.com if (r.second) { 36011308Santhony.gutierrez@amd.com RequestTable::iterator i = r.first; 36111308Santhony.gutierrez@amd.com i->second = new GPUCoalescerRequest(pkt, request_type, 36211308Santhony.gutierrez@amd.com curCycle()); 36311308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, 36411308Santhony.gutierrez@amd.com "Inserting read request for paddr %#x for type %d\n", 36511308Santhony.gutierrez@amd.com pkt->req->getPaddr(), i->second->m_type); 36611308Santhony.gutierrez@amd.com m_outstanding_count++; 36711308Santhony.gutierrez@amd.com } else { 36811308Santhony.gutierrez@amd.com return true; 36911308Santhony.gutierrez@amd.com } 37011308Santhony.gutierrez@amd.com } 37111308Santhony.gutierrez@amd.com 37211308Santhony.gutierrez@amd.com m_outstandReqHist.sample(m_outstanding_count); 37311308Santhony.gutierrez@amd.com 37411308Santhony.gutierrez@amd.com total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size(); 37511308Santhony.gutierrez@amd.com assert(m_outstanding_count == total_outstanding); 37611308Santhony.gutierrez@amd.com 37711308Santhony.gutierrez@amd.com return false; 37811308Santhony.gutierrez@amd.com} 37911308Santhony.gutierrez@amd.com 38011308Santhony.gutierrez@amd.comvoid 38111308Santhony.gutierrez@amd.comGPUCoalescer::markRemoved() 38211308Santhony.gutierrez@amd.com{ 38311308Santhony.gutierrez@amd.com m_outstanding_count--; 38411308Santhony.gutierrez@amd.com assert(m_outstanding_count == 38511308Santhony.gutierrez@amd.com m_writeRequestTable.size() + m_readRequestTable.size()); 38611308Santhony.gutierrez@amd.com} 38711308Santhony.gutierrez@amd.com 38811308Santhony.gutierrez@amd.comvoid 38911308Santhony.gutierrez@amd.comGPUCoalescer::removeRequest(GPUCoalescerRequest* srequest) 39011308Santhony.gutierrez@amd.com{ 39111308Santhony.gutierrez@amd.com assert(m_outstanding_count == 39211308Santhony.gutierrez@amd.com m_writeRequestTable.size() + m_readRequestTable.size()); 39311308Santhony.gutierrez@amd.com 39411308Santhony.gutierrez@amd.com Addr line_addr = makeLineAddress(srequest->pkt->getAddr()); 39511308Santhony.gutierrez@amd.com if ((srequest->m_type == RubyRequestType_ST) || 39611308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_RMW_Read) || 39711308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_RMW_Write) || 39811308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_Load_Linked) || 39911308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_Store_Conditional) || 40011308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_Locked_RMW_Read) || 40111308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_Locked_RMW_Write)) { 40211308Santhony.gutierrez@amd.com m_writeRequestTable.erase(line_addr); 40311308Santhony.gutierrez@amd.com } else { 40411308Santhony.gutierrez@amd.com m_readRequestTable.erase(line_addr); 40511308Santhony.gutierrez@amd.com } 40611308Santhony.gutierrez@amd.com 40711308Santhony.gutierrez@amd.com markRemoved(); 40811308Santhony.gutierrez@amd.com} 40911308Santhony.gutierrez@amd.com 41011308Santhony.gutierrez@amd.combool 41111308Santhony.gutierrez@amd.comGPUCoalescer::handleLlsc(Addr address, GPUCoalescerRequest* request) 41211308Santhony.gutierrez@amd.com{ 41311308Santhony.gutierrez@amd.com // 41411308Santhony.gutierrez@amd.com // The success flag indicates whether the LLSC operation was successful. 41511308Santhony.gutierrez@amd.com // LL ops will always succeed, but SC may fail if the cache line is no 41611308Santhony.gutierrez@amd.com // longer locked. 41711308Santhony.gutierrez@amd.com // 41811308Santhony.gutierrez@amd.com bool success = true; 41911308Santhony.gutierrez@amd.com if (request->m_type == RubyRequestType_Store_Conditional) { 42011308Santhony.gutierrez@amd.com if (!m_dataCache_ptr->isLocked(address, m_version)) { 42111308Santhony.gutierrez@amd.com // 42211308Santhony.gutierrez@amd.com // For failed SC requests, indicate the failure to the cpu by 42311308Santhony.gutierrez@amd.com // setting the extra data to zero. 42411308Santhony.gutierrez@amd.com // 42511308Santhony.gutierrez@amd.com request->pkt->req->setExtraData(0); 42611308Santhony.gutierrez@amd.com success = false; 42711308Santhony.gutierrez@amd.com } else { 42811308Santhony.gutierrez@amd.com // 42911308Santhony.gutierrez@amd.com // For successful SC requests, indicate the success to the cpu by 43011308Santhony.gutierrez@amd.com // setting the extra data to one. 43111308Santhony.gutierrez@amd.com // 43211308Santhony.gutierrez@amd.com request->pkt->req->setExtraData(1); 43311308Santhony.gutierrez@amd.com } 43411308Santhony.gutierrez@amd.com // 43511308Santhony.gutierrez@amd.com // Independent of success, all SC operations must clear the lock 43611308Santhony.gutierrez@amd.com // 43711308Santhony.gutierrez@amd.com m_dataCache_ptr->clearLocked(address); 43811308Santhony.gutierrez@amd.com } else if (request->m_type == RubyRequestType_Load_Linked) { 43911308Santhony.gutierrez@amd.com // 44011308Santhony.gutierrez@amd.com // Note: To fully follow Alpha LLSC semantics, should the LL clear any 44111308Santhony.gutierrez@amd.com // previously locked cache lines? 44211308Santhony.gutierrez@amd.com // 44311308Santhony.gutierrez@amd.com m_dataCache_ptr->setLocked(address, m_version); 44411308Santhony.gutierrez@amd.com } else if ((m_dataCache_ptr->isTagPresent(address)) && 44511308Santhony.gutierrez@amd.com (m_dataCache_ptr->isLocked(address, m_version))) { 44611308Santhony.gutierrez@amd.com // 44711308Santhony.gutierrez@amd.com // Normal writes should clear the locked address 44811308Santhony.gutierrez@amd.com // 44911308Santhony.gutierrez@amd.com m_dataCache_ptr->clearLocked(address); 45011308Santhony.gutierrez@amd.com } 45111308Santhony.gutierrez@amd.com return success; 45211308Santhony.gutierrez@amd.com} 45311308Santhony.gutierrez@amd.com 45411308Santhony.gutierrez@amd.comvoid 45511308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address, DataBlock& data) 45611308Santhony.gutierrez@amd.com{ 45711308Santhony.gutierrez@amd.com writeCallback(address, MachineType_NULL, data); 45811308Santhony.gutierrez@amd.com} 45911308Santhony.gutierrez@amd.com 46011308Santhony.gutierrez@amd.comvoid 46111308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address, 46211308Santhony.gutierrez@amd.com MachineType mach, 46311308Santhony.gutierrez@amd.com DataBlock& data) 46411308Santhony.gutierrez@amd.com{ 46511308Santhony.gutierrez@amd.com writeCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0)); 46611308Santhony.gutierrez@amd.com} 46711308Santhony.gutierrez@amd.com 46811308Santhony.gutierrez@amd.comvoid 46911308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address, 47011308Santhony.gutierrez@amd.com MachineType mach, 47111308Santhony.gutierrez@amd.com DataBlock& data, 47211308Santhony.gutierrez@amd.com Cycles initialRequestTime, 47311308Santhony.gutierrez@amd.com Cycles forwardRequestTime, 47411308Santhony.gutierrez@amd.com Cycles firstResponseTime) 47511308Santhony.gutierrez@amd.com{ 47611308Santhony.gutierrez@amd.com writeCallback(address, mach, data, 47711308Santhony.gutierrez@amd.com initialRequestTime, forwardRequestTime, firstResponseTime, 47811308Santhony.gutierrez@amd.com false); 47911308Santhony.gutierrez@amd.com} 48011308Santhony.gutierrez@amd.com 48111308Santhony.gutierrez@amd.comvoid 48211308Santhony.gutierrez@amd.comGPUCoalescer::writeCallback(Addr address, 48311308Santhony.gutierrez@amd.com MachineType mach, 48411308Santhony.gutierrez@amd.com DataBlock& data, 48511308Santhony.gutierrez@amd.com Cycles initialRequestTime, 48611308Santhony.gutierrez@amd.com Cycles forwardRequestTime, 48711308Santhony.gutierrez@amd.com Cycles firstResponseTime, 48811308Santhony.gutierrez@amd.com bool isRegion) 48911308Santhony.gutierrez@amd.com{ 49011308Santhony.gutierrez@amd.com assert(address == makeLineAddress(address)); 49111308Santhony.gutierrez@amd.com 49211308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, "write callback for address %#x\n", address); 49311308Santhony.gutierrez@amd.com assert(m_writeRequestTable.count(makeLineAddress(address))); 49411308Santhony.gutierrez@amd.com 49511308Santhony.gutierrez@amd.com RequestTable::iterator i = m_writeRequestTable.find(address); 49611308Santhony.gutierrez@amd.com assert(i != m_writeRequestTable.end()); 49711308Santhony.gutierrez@amd.com GPUCoalescerRequest* request = i->second; 49811308Santhony.gutierrez@amd.com 49911308Santhony.gutierrez@amd.com m_writeRequestTable.erase(i); 50011308Santhony.gutierrez@amd.com markRemoved(); 50111308Santhony.gutierrez@amd.com 50211308Santhony.gutierrez@amd.com assert((request->m_type == RubyRequestType_ST) || 50311308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_ATOMIC) || 50411308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_ATOMIC_RETURN) || 50511308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_ATOMIC_NO_RETURN) || 50611308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_RMW_Read) || 50711308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_RMW_Write) || 50811308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_Load_Linked) || 50911308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_Store_Conditional) || 51011308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_Locked_RMW_Read) || 51111308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_Locked_RMW_Write) || 51211308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_FLUSH)); 51311308Santhony.gutierrez@amd.com 51411308Santhony.gutierrez@amd.com 51511308Santhony.gutierrez@amd.com // 51611308Santhony.gutierrez@amd.com // For Alpha, properly handle LL, SC, and write requests with respect to 51711308Santhony.gutierrez@amd.com // locked cache blocks. 51811308Santhony.gutierrez@amd.com // 51911308Santhony.gutierrez@amd.com // Not valid for Network_test protocl 52011308Santhony.gutierrez@amd.com // 52111308Santhony.gutierrez@amd.com bool success = true; 52211308Santhony.gutierrez@amd.com if(!m_usingNetworkTester) 52311308Santhony.gutierrez@amd.com success = handleLlsc(address, request); 52411308Santhony.gutierrez@amd.com 52511308Santhony.gutierrez@amd.com if (request->m_type == RubyRequestType_Locked_RMW_Read) { 52611308Santhony.gutierrez@amd.com m_controller->blockOnQueue(address, m_mandatory_q_ptr); 52711308Santhony.gutierrez@amd.com } else if (request->m_type == RubyRequestType_Locked_RMW_Write) { 52811308Santhony.gutierrez@amd.com m_controller->unblock(address); 52911308Santhony.gutierrez@amd.com } 53011308Santhony.gutierrez@amd.com 53111308Santhony.gutierrez@amd.com hitCallback(request, mach, data, success, 53211308Santhony.gutierrez@amd.com request->issue_time, forwardRequestTime, firstResponseTime, 53311308Santhony.gutierrez@amd.com isRegion); 53411308Santhony.gutierrez@amd.com} 53511308Santhony.gutierrez@amd.com 53611308Santhony.gutierrez@amd.comvoid 53711308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address, DataBlock& data) 53811308Santhony.gutierrez@amd.com{ 53911308Santhony.gutierrez@amd.com readCallback(address, MachineType_NULL, data); 54011308Santhony.gutierrez@amd.com} 54111308Santhony.gutierrez@amd.com 54211308Santhony.gutierrez@amd.comvoid 54311308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address, 54411308Santhony.gutierrez@amd.com MachineType mach, 54511308Santhony.gutierrez@amd.com DataBlock& data) 54611308Santhony.gutierrez@amd.com{ 54711308Santhony.gutierrez@amd.com readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0)); 54811308Santhony.gutierrez@amd.com} 54911308Santhony.gutierrez@amd.com 55011308Santhony.gutierrez@amd.comvoid 55111308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address, 55211308Santhony.gutierrez@amd.com MachineType mach, 55311308Santhony.gutierrez@amd.com DataBlock& data, 55411308Santhony.gutierrez@amd.com Cycles initialRequestTime, 55511308Santhony.gutierrez@amd.com Cycles forwardRequestTime, 55611308Santhony.gutierrez@amd.com Cycles firstResponseTime) 55711308Santhony.gutierrez@amd.com{ 55811308Santhony.gutierrez@amd.com 55911308Santhony.gutierrez@amd.com readCallback(address, mach, data, 56011308Santhony.gutierrez@amd.com initialRequestTime, forwardRequestTime, firstResponseTime, 56111308Santhony.gutierrez@amd.com false); 56211308Santhony.gutierrez@amd.com} 56311308Santhony.gutierrez@amd.com 56411308Santhony.gutierrez@amd.comvoid 56511308Santhony.gutierrez@amd.comGPUCoalescer::readCallback(Addr address, 56611308Santhony.gutierrez@amd.com MachineType mach, 56711308Santhony.gutierrez@amd.com DataBlock& data, 56811308Santhony.gutierrez@amd.com Cycles initialRequestTime, 56911308Santhony.gutierrez@amd.com Cycles forwardRequestTime, 57011308Santhony.gutierrez@amd.com Cycles firstResponseTime, 57111308Santhony.gutierrez@amd.com bool isRegion) 57211308Santhony.gutierrez@amd.com{ 57311308Santhony.gutierrez@amd.com assert(address == makeLineAddress(address)); 57411308Santhony.gutierrez@amd.com assert(m_readRequestTable.count(makeLineAddress(address))); 57511308Santhony.gutierrez@amd.com 57611308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, "read callback for address %#x\n", address); 57711308Santhony.gutierrez@amd.com RequestTable::iterator i = m_readRequestTable.find(address); 57811308Santhony.gutierrez@amd.com assert(i != m_readRequestTable.end()); 57911308Santhony.gutierrez@amd.com GPUCoalescerRequest* request = i->second; 58011308Santhony.gutierrez@amd.com 58111308Santhony.gutierrez@amd.com m_readRequestTable.erase(i); 58211308Santhony.gutierrez@amd.com markRemoved(); 58311308Santhony.gutierrez@amd.com 58411308Santhony.gutierrez@amd.com assert((request->m_type == RubyRequestType_LD) || 58511308Santhony.gutierrez@amd.com (request->m_type == RubyRequestType_IFETCH)); 58611308Santhony.gutierrez@amd.com 58711308Santhony.gutierrez@amd.com hitCallback(request, mach, data, true, 58811308Santhony.gutierrez@amd.com request->issue_time, forwardRequestTime, firstResponseTime, 58911308Santhony.gutierrez@amd.com isRegion); 59011308Santhony.gutierrez@amd.com} 59111308Santhony.gutierrez@amd.com 59211308Santhony.gutierrez@amd.comvoid 59311308Santhony.gutierrez@amd.comGPUCoalescer::hitCallback(GPUCoalescerRequest* srequest, 59411308Santhony.gutierrez@amd.com MachineType mach, 59511308Santhony.gutierrez@amd.com DataBlock& data, 59611308Santhony.gutierrez@amd.com bool success, 59711308Santhony.gutierrez@amd.com Cycles initialRequestTime, 59811308Santhony.gutierrez@amd.com Cycles forwardRequestTime, 59911308Santhony.gutierrez@amd.com Cycles firstResponseTime, 60011308Santhony.gutierrez@amd.com bool isRegion) 60111308Santhony.gutierrez@amd.com{ 60211308Santhony.gutierrez@amd.com PacketPtr pkt = srequest->pkt; 60311308Santhony.gutierrez@amd.com Addr request_address = pkt->getAddr(); 60411308Santhony.gutierrez@amd.com Addr request_line_address = makeLineAddress(request_address); 60511308Santhony.gutierrez@amd.com 60611308Santhony.gutierrez@amd.com RubyRequestType type = srequest->m_type; 60711308Santhony.gutierrez@amd.com 60811308Santhony.gutierrez@amd.com // Set this cache entry to the most recently used 60911308Santhony.gutierrez@amd.com if (type == RubyRequestType_IFETCH) { 61011308Santhony.gutierrez@amd.com if (m_instCache_ptr->isTagPresent(request_line_address)) 61111308Santhony.gutierrez@amd.com m_instCache_ptr->setMRU(request_line_address); 61211308Santhony.gutierrez@amd.com } else { 61311308Santhony.gutierrez@amd.com if (m_dataCache_ptr->isTagPresent(request_line_address)) 61411308Santhony.gutierrez@amd.com m_dataCache_ptr->setMRU(request_line_address); 61511308Santhony.gutierrez@amd.com } 61611308Santhony.gutierrez@amd.com 61711308Santhony.gutierrez@amd.com recordMissLatency(srequest, mach, 61811308Santhony.gutierrez@amd.com initialRequestTime, 61911308Santhony.gutierrez@amd.com forwardRequestTime, 62011308Santhony.gutierrez@amd.com firstResponseTime, 62111308Santhony.gutierrez@amd.com success, isRegion); 62211308Santhony.gutierrez@amd.com // update the data 62311308Santhony.gutierrez@amd.com // 62411308Santhony.gutierrez@amd.com // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER 62511308Santhony.gutierrez@amd.com int len = reqCoalescer[request_line_address].size(); 62611308Santhony.gutierrez@amd.com std::vector<PacketPtr> mylist; 62711308Santhony.gutierrez@amd.com for (int i = 0; i < len; ++i) { 62811308Santhony.gutierrez@amd.com PacketPtr pkt = reqCoalescer[request_line_address][i].first; 62911308Santhony.gutierrez@amd.com assert(type == 63011308Santhony.gutierrez@amd.com reqCoalescer[request_line_address][i].second[PrimaryType]); 63111308Santhony.gutierrez@amd.com request_address = pkt->getAddr(); 63211308Santhony.gutierrez@amd.com request_line_address = makeLineAddress(pkt->getAddr()); 63311308Santhony.gutierrez@amd.com if (pkt->getPtr<uint8_t>()) { 63411308Santhony.gutierrez@amd.com if ((type == RubyRequestType_LD) || 63511308Santhony.gutierrez@amd.com (type == RubyRequestType_ATOMIC) || 63611308Santhony.gutierrez@amd.com (type == RubyRequestType_ATOMIC_RETURN) || 63711308Santhony.gutierrez@amd.com (type == RubyRequestType_IFETCH) || 63811308Santhony.gutierrez@amd.com (type == RubyRequestType_RMW_Read) || 63911308Santhony.gutierrez@amd.com (type == RubyRequestType_Locked_RMW_Read) || 64011308Santhony.gutierrez@amd.com (type == RubyRequestType_Load_Linked)) { 64111308Santhony.gutierrez@amd.com memcpy(pkt->getPtr<uint8_t>(), 64211308Santhony.gutierrez@amd.com data.getData(getOffset(request_address), 64311308Santhony.gutierrez@amd.com pkt->getSize()), 64411308Santhony.gutierrez@amd.com pkt->getSize()); 64511308Santhony.gutierrez@amd.com } else { 64611308Santhony.gutierrez@amd.com data.setData(pkt->getPtr<uint8_t>(), 64711308Santhony.gutierrez@amd.com getOffset(request_address), pkt->getSize()); 64811308Santhony.gutierrez@amd.com } 64911308Santhony.gutierrez@amd.com } else { 65011308Santhony.gutierrez@amd.com DPRINTF(MemoryAccess, 65111308Santhony.gutierrez@amd.com "WARNING. Data not transfered from Ruby to M5 for type " \ 65211308Santhony.gutierrez@amd.com "%s\n", 65311308Santhony.gutierrez@amd.com RubyRequestType_to_string(type)); 65411308Santhony.gutierrez@amd.com } 65511308Santhony.gutierrez@amd.com 65611308Santhony.gutierrez@amd.com // If using the RubyTester, update the RubyTester sender state's 65711308Santhony.gutierrez@amd.com // subBlock with the recieved data. The tester will later access 65811308Santhony.gutierrez@amd.com // this state. 65911308Santhony.gutierrez@amd.com // Note: RubyPort will access it's sender state before the 66011308Santhony.gutierrez@amd.com // RubyTester. 66111308Santhony.gutierrez@amd.com if (m_usingRubyTester) { 66211308Santhony.gutierrez@amd.com RubyPort::SenderState *requestSenderState = 66311308Santhony.gutierrez@amd.com safe_cast<RubyPort::SenderState*>(pkt->senderState); 66411308Santhony.gutierrez@amd.com RubyTester::SenderState* testerSenderState = 66511308Santhony.gutierrez@amd.com safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor); 66611308Santhony.gutierrez@amd.com testerSenderState->subBlock.mergeFrom(data); 66711308Santhony.gutierrez@amd.com } 66811308Santhony.gutierrez@amd.com 66911308Santhony.gutierrez@amd.com mylist.push_back(pkt); 67011308Santhony.gutierrez@amd.com } 67111308Santhony.gutierrez@amd.com delete srequest; 67211308Santhony.gutierrez@amd.com reqCoalescer.erase(request_line_address); 67311308Santhony.gutierrez@amd.com assert(!reqCoalescer.count(request_line_address)); 67411308Santhony.gutierrez@amd.com 67511308Santhony.gutierrez@amd.com 67611308Santhony.gutierrez@amd.com 67711308Santhony.gutierrez@amd.com completeHitCallback(mylist, len); 67811308Santhony.gutierrez@amd.com} 67911308Santhony.gutierrez@amd.com 68011308Santhony.gutierrez@amd.combool 68111308Santhony.gutierrez@amd.comGPUCoalescer::empty() const 68211308Santhony.gutierrez@amd.com{ 68311308Santhony.gutierrez@amd.com return m_writeRequestTable.empty() && m_readRequestTable.empty(); 68411308Santhony.gutierrez@amd.com} 68511308Santhony.gutierrez@amd.com 68611308Santhony.gutierrez@amd.com// Analyzes the packet to see if this request can be coalesced. 68711308Santhony.gutierrez@amd.com// If request can be coalesced, this request is added to the reqCoalescer table 68811308Santhony.gutierrez@amd.com// and makeRequest returns RequestStatus_Issued; 68911308Santhony.gutierrez@amd.com// If this is the first request to a cacheline, request is added to both 69011308Santhony.gutierrez@amd.com// newRequests queue and to the reqCoalescer table; makeRequest 69111308Santhony.gutierrez@amd.com// returns RequestStatus_Issued. 69211308Santhony.gutierrez@amd.com// If there is a pending request to this cacheline and this request 69311308Santhony.gutierrez@amd.com// can't be coalesced, RequestStatus_Aliased is returned and 69411308Santhony.gutierrez@amd.com// the packet needs to be reissued. 69511308Santhony.gutierrez@amd.comRequestStatus 69611308Santhony.gutierrez@amd.comGPUCoalescer::makeRequest(PacketPtr pkt) 69711308Santhony.gutierrez@amd.com{ 69811308Santhony.gutierrez@amd.com // Check for GPU Barrier Kernel End or Kernel Begin 69911308Santhony.gutierrez@amd.com // Leave these to be handled by the child class 70011308Santhony.gutierrez@amd.com // Kernel End/Barrier = isFlush + isRelease 70111308Santhony.gutierrez@amd.com // Kernel Begin = isFlush + isAcquire 70211308Santhony.gutierrez@amd.com if (pkt->req->isKernel()) { 70311308Santhony.gutierrez@amd.com if (pkt->req->isAcquire()){ 70411308Santhony.gutierrez@amd.com // This is a Kernel Begin leave handling to 70511308Santhony.gutierrez@amd.com // virtual xCoalescer::makeRequest 70611308Santhony.gutierrez@amd.com return RequestStatus_Issued; 70711308Santhony.gutierrez@amd.com }else if(pkt->req->isRelease()) { 70811308Santhony.gutierrez@amd.com // This is a Kernel End leave handling to 70911308Santhony.gutierrez@amd.com // virtual xCoalescer::makeRequest 71011308Santhony.gutierrez@amd.com // If we are here then we didn't call 71111308Santhony.gutierrez@amd.com // a virtual version of this function 71211308Santhony.gutierrez@amd.com // so we will also schedule the callback 71311308Santhony.gutierrez@amd.com int wf_id = 0; 71411308Santhony.gutierrez@amd.com if (pkt->req->hasContextId()) { 71511308Santhony.gutierrez@amd.com wf_id = pkt->req->contextId(); 71611308Santhony.gutierrez@amd.com } 71711308Santhony.gutierrez@amd.com insertKernel(wf_id, pkt); 71811308Santhony.gutierrez@amd.com newKernelEnds.push_back(wf_id); 71911308Santhony.gutierrez@amd.com if (!issueEvent.scheduled()) { 72011308Santhony.gutierrez@amd.com schedule(issueEvent, curTick()); 72111308Santhony.gutierrez@amd.com } 72211308Santhony.gutierrez@amd.com return RequestStatus_Issued; 72311308Santhony.gutierrez@amd.com } 72411308Santhony.gutierrez@amd.com } 72511308Santhony.gutierrez@amd.com 72611308Santhony.gutierrez@amd.com // If number of outstanding requests greater than the max allowed, 72711308Santhony.gutierrez@amd.com // return RequestStatus_BufferFull. This logic can be extended to 72811308Santhony.gutierrez@amd.com // support proper backpressure. 72911308Santhony.gutierrez@amd.com if (m_outstanding_count >= m_max_outstanding_requests) { 73011308Santhony.gutierrez@amd.com return RequestStatus_BufferFull; 73111308Santhony.gutierrez@amd.com } 73211308Santhony.gutierrez@amd.com 73311308Santhony.gutierrez@amd.com RubyRequestType primary_type = RubyRequestType_NULL; 73411308Santhony.gutierrez@amd.com RubyRequestType secondary_type = RubyRequestType_NULL; 73511308Santhony.gutierrez@amd.com 73611308Santhony.gutierrez@amd.com if (pkt->isLLSC()) { 73711308Santhony.gutierrez@amd.com // 73811308Santhony.gutierrez@amd.com // Alpha LL/SC instructions need to be handled carefully by the cache 73911308Santhony.gutierrez@amd.com // coherence protocol to ensure they follow the proper semantics. In 74011308Santhony.gutierrez@amd.com // particular, by identifying the operations as atomic, the protocol 74111308Santhony.gutierrez@amd.com // should understand that migratory sharing optimizations should not 74211308Santhony.gutierrez@amd.com // be performed (i.e. a load between the LL and SC should not steal 74311308Santhony.gutierrez@amd.com // away exclusive permission). 74411308Santhony.gutierrez@amd.com // 74511308Santhony.gutierrez@amd.com if (pkt->isWrite()) { 74611308Santhony.gutierrez@amd.com primary_type = RubyRequestType_Store_Conditional; 74711308Santhony.gutierrez@amd.com } else { 74811308Santhony.gutierrez@amd.com assert(pkt->isRead()); 74911308Santhony.gutierrez@amd.com primary_type = RubyRequestType_Load_Linked; 75011308Santhony.gutierrez@amd.com } 75111308Santhony.gutierrez@amd.com secondary_type = RubyRequestType_ATOMIC; 75211308Santhony.gutierrez@amd.com } else if (pkt->req->isLockedRMW()) { 75311308Santhony.gutierrez@amd.com // 75411308Santhony.gutierrez@amd.com // x86 locked instructions are translated to store cache coherence 75511308Santhony.gutierrez@amd.com // requests because these requests should always be treated as read 75611308Santhony.gutierrez@amd.com // exclusive operations and should leverage any migratory sharing 75711308Santhony.gutierrez@amd.com // optimization built into the protocol. 75811308Santhony.gutierrez@amd.com // 75911308Santhony.gutierrez@amd.com if (pkt->isWrite()) { 76011308Santhony.gutierrez@amd.com primary_type = RubyRequestType_Locked_RMW_Write; 76111308Santhony.gutierrez@amd.com } else { 76211308Santhony.gutierrez@amd.com assert(pkt->isRead()); 76311308Santhony.gutierrez@amd.com primary_type = RubyRequestType_Locked_RMW_Read; 76411308Santhony.gutierrez@amd.com } 76511308Santhony.gutierrez@amd.com secondary_type = RubyRequestType_ST; 76611308Santhony.gutierrez@amd.com } else if (pkt->isAtomicOp()) { 76711308Santhony.gutierrez@amd.com // 76811308Santhony.gutierrez@amd.com // GPU Atomic Operation 76911308Santhony.gutierrez@amd.com // 77011308Santhony.gutierrez@amd.com primary_type = RubyRequestType_ATOMIC; 77111308Santhony.gutierrez@amd.com secondary_type = RubyRequestType_ATOMIC; 77211308Santhony.gutierrez@amd.com } else { 77311308Santhony.gutierrez@amd.com if (pkt->isRead()) { 77411308Santhony.gutierrez@amd.com if (pkt->req->isInstFetch()) { 77511308Santhony.gutierrez@amd.com primary_type = secondary_type = RubyRequestType_IFETCH; 77611308Santhony.gutierrez@amd.com } else { 77711308Santhony.gutierrez@amd.com#if THE_ISA == X86_ISA 77811308Santhony.gutierrez@amd.com uint32_t flags = pkt->req->getFlags(); 77911308Santhony.gutierrez@amd.com bool storeCheck = flags & 78011308Santhony.gutierrez@amd.com (TheISA::StoreCheck << TheISA::FlagShift); 78111308Santhony.gutierrez@amd.com#else 78211308Santhony.gutierrez@amd.com bool storeCheck = false; 78311308Santhony.gutierrez@amd.com#endif // X86_ISA 78411308Santhony.gutierrez@amd.com if (storeCheck) { 78511308Santhony.gutierrez@amd.com primary_type = RubyRequestType_RMW_Read; 78611308Santhony.gutierrez@amd.com secondary_type = RubyRequestType_ST; 78711308Santhony.gutierrez@amd.com } else { 78811308Santhony.gutierrez@amd.com primary_type = secondary_type = RubyRequestType_LD; 78911308Santhony.gutierrez@amd.com } 79011308Santhony.gutierrez@amd.com } 79111308Santhony.gutierrez@amd.com } else if (pkt->isWrite()) { 79211308Santhony.gutierrez@amd.com // 79311308Santhony.gutierrez@amd.com // Note: M5 packets do not differentiate ST from RMW_Write 79411308Santhony.gutierrez@amd.com // 79511308Santhony.gutierrez@amd.com primary_type = secondary_type = RubyRequestType_ST; 79611308Santhony.gutierrez@amd.com } else if (pkt->isFlush()) { 79711308Santhony.gutierrez@amd.com primary_type = secondary_type = RubyRequestType_FLUSH; 79811308Santhony.gutierrez@amd.com } else if (pkt->req->isRelease() || pkt->req->isAcquire()) { 79911308Santhony.gutierrez@amd.com if (assumingRfOCoherence) { 80011308Santhony.gutierrez@amd.com // If we reached here, this request must be a memFence 80111308Santhony.gutierrez@amd.com // and the protocol implements RfO, the coalescer can 80211308Santhony.gutierrez@amd.com // assume sequentially consistency and schedule the callback 80311308Santhony.gutierrez@amd.com // immediately. 80411308Santhony.gutierrez@amd.com // Currently the code implements fence callbacks 80511308Santhony.gutierrez@amd.com // by reusing the mechanism for kernel completions. 80611308Santhony.gutierrez@amd.com // This should be fixed. 80711308Santhony.gutierrez@amd.com int wf_id = 0; 80811308Santhony.gutierrez@amd.com if (pkt->req->hasContextId()) { 80911308Santhony.gutierrez@amd.com wf_id = pkt->req->contextId(); 81011308Santhony.gutierrez@amd.com } 81111308Santhony.gutierrez@amd.com insertKernel(wf_id, pkt); 81211308Santhony.gutierrez@amd.com newKernelEnds.push_back(wf_id); 81311308Santhony.gutierrez@amd.com if (!issueEvent.scheduled()) { 81411308Santhony.gutierrez@amd.com schedule(issueEvent, curTick()); 81511308Santhony.gutierrez@amd.com } 81611308Santhony.gutierrez@amd.com return RequestStatus_Issued; 81711308Santhony.gutierrez@amd.com } else { 81811308Santhony.gutierrez@amd.com // If not RfO, return issued here and let the child coalescer 81911308Santhony.gutierrez@amd.com // take care of it. 82011308Santhony.gutierrez@amd.com return RequestStatus_Issued; 82111308Santhony.gutierrez@amd.com } 82211308Santhony.gutierrez@amd.com } else { 82311308Santhony.gutierrez@amd.com panic("Unsupported ruby packet type\n"); 82411308Santhony.gutierrez@amd.com } 82511308Santhony.gutierrez@amd.com } 82611308Santhony.gutierrez@amd.com 82711308Santhony.gutierrez@amd.com // Check if there is any pending request to this cache line from 82811308Santhony.gutierrez@amd.com // previous cycles. 82911308Santhony.gutierrez@amd.com // If there is a pending request, return aliased. Since coalescing 83011308Santhony.gutierrez@amd.com // across time is not permitted, aliased requests are not coalesced. 83111308Santhony.gutierrez@amd.com // If a request for this address has already been issued, we must block 83211308Santhony.gutierrez@amd.com RequestStatus status = getRequestStatus(pkt, primary_type); 83311308Santhony.gutierrez@amd.com if (status != RequestStatus_Ready) 83411308Santhony.gutierrez@amd.com return status; 83511308Santhony.gutierrez@amd.com 83611308Santhony.gutierrez@amd.com Addr line_addr = makeLineAddress(pkt->getAddr()); 83711308Santhony.gutierrez@amd.com 83811308Santhony.gutierrez@amd.com // Check if this request can be coalesced with previous 83911308Santhony.gutierrez@amd.com // requests from this cycle. 84011308Santhony.gutierrez@amd.com if (!reqCoalescer.count(line_addr)) { 84111308Santhony.gutierrez@amd.com // This is the first access to this cache line. 84211308Santhony.gutierrez@amd.com // A new request to the memory subsystem has to be 84311308Santhony.gutierrez@amd.com // made in the next cycle for this cache line, so 84411308Santhony.gutierrez@amd.com // add this line addr to the "newRequests" queue 84511308Santhony.gutierrez@amd.com newRequests.push_back(line_addr); 84611308Santhony.gutierrez@amd.com 84711308Santhony.gutierrez@amd.com // There was a request to this cache line in this cycle, 84811308Santhony.gutierrez@amd.com // let us see if we can coalesce this request with the previous 84911308Santhony.gutierrez@amd.com // requests from this cycle 85011308Santhony.gutierrez@amd.com } else if (primary_type != 85111308Santhony.gutierrez@amd.com reqCoalescer[line_addr][0].second[PrimaryType]) { 85211308Santhony.gutierrez@amd.com // can't coalesce loads, stores and atomics! 85311308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 85411308Santhony.gutierrez@amd.com } else if (pkt->req->isLockedRMW() || 85511308Santhony.gutierrez@amd.com reqCoalescer[line_addr][0].first->req->isLockedRMW()) { 85611308Santhony.gutierrez@amd.com // can't coalesce locked accesses, but can coalesce atomics! 85711308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 85811308Santhony.gutierrez@amd.com } else if (pkt->req->hasContextId() && pkt->req->isRelease() && 85911308Santhony.gutierrez@amd.com pkt->req->contextId() != 86011308Santhony.gutierrez@amd.com reqCoalescer[line_addr][0].first->req->contextId()) { 86111308Santhony.gutierrez@amd.com // can't coalesce releases from different wavefronts 86211308Santhony.gutierrez@amd.com return RequestStatus_Aliased; 86311308Santhony.gutierrez@amd.com } 86411308Santhony.gutierrez@amd.com 86511308Santhony.gutierrez@amd.com // in addition to the packet, we need to save both request types 86611308Santhony.gutierrez@amd.com reqCoalescer[line_addr].push_back( 86711308Santhony.gutierrez@amd.com RequestDesc(pkt, std::vector<RubyRequestType>()) ); 86811308Santhony.gutierrez@amd.com reqCoalescer[line_addr].back().second.push_back(primary_type); 86911308Santhony.gutierrez@amd.com reqCoalescer[line_addr].back().second.push_back(secondary_type); 87011308Santhony.gutierrez@amd.com if (!issueEvent.scheduled()) 87111308Santhony.gutierrez@amd.com schedule(issueEvent, curTick()); 87211308Santhony.gutierrez@amd.com // TODO: issue hardware prefetches here 87311308Santhony.gutierrez@amd.com return RequestStatus_Issued; 87411308Santhony.gutierrez@amd.com} 87511308Santhony.gutierrez@amd.com 87611308Santhony.gutierrez@amd.comvoid 87711308Santhony.gutierrez@amd.comGPUCoalescer::issueRequest(PacketPtr pkt, RubyRequestType secondary_type) 87811308Santhony.gutierrez@amd.com{ 87911308Santhony.gutierrez@amd.com 88011308Santhony.gutierrez@amd.com int proc_id = -1; 88111308Santhony.gutierrez@amd.com if (pkt != NULL && pkt->req->hasContextId()) { 88211308Santhony.gutierrez@amd.com proc_id = pkt->req->contextId(); 88311308Santhony.gutierrez@amd.com } 88411308Santhony.gutierrez@amd.com 88511308Santhony.gutierrez@amd.com // If valid, copy the pc to the ruby request 88611308Santhony.gutierrez@amd.com Addr pc = 0; 88711308Santhony.gutierrez@amd.com if (pkt->req->hasPC()) { 88811308Santhony.gutierrez@amd.com pc = pkt->req->getPC(); 88911308Santhony.gutierrez@amd.com } 89011308Santhony.gutierrez@amd.com 89111308Santhony.gutierrez@amd.com // At the moment setting scopes only counts 89211308Santhony.gutierrez@amd.com // for GPU spill space accesses 89311308Santhony.gutierrez@amd.com // which is pkt->req->isStack() 89411308Santhony.gutierrez@amd.com // this scope is REPLACE since it 89511308Santhony.gutierrez@amd.com // does not need to be flushed at the end 89611308Santhony.gutierrez@amd.com // of a kernel Private and local may need 89711308Santhony.gutierrez@amd.com // to be visible at the end of the kernel 89811308Santhony.gutierrez@amd.com HSASegment accessSegment = reqSegmentToHSASegment(pkt->req); 89911308Santhony.gutierrez@amd.com HSAScope accessScope = reqScopeToHSAScope(pkt->req); 90011308Santhony.gutierrez@amd.com 90111308Santhony.gutierrez@amd.com Addr line_addr = makeLineAddress(pkt->getAddr()); 90211308Santhony.gutierrez@amd.com 90311308Santhony.gutierrez@amd.com // Creating WriteMask that records written bytes 90411308Santhony.gutierrez@amd.com // and atomic operations. This enables partial writes 90511308Santhony.gutierrez@amd.com // and partial reads of those writes 90611308Santhony.gutierrez@amd.com DataBlock dataBlock; 90711308Santhony.gutierrez@amd.com dataBlock.clear(); 90811308Santhony.gutierrez@amd.com uint32_t blockSize = RubySystem::getBlockSizeBytes(); 90911308Santhony.gutierrez@amd.com std::vector<bool> accessMask(blockSize,false); 91011308Santhony.gutierrez@amd.com std::vector< std::pair<int,AtomicOpFunctor*> > atomicOps; 91111308Santhony.gutierrez@amd.com uint32_t tableSize = reqCoalescer[line_addr].size(); 91211308Santhony.gutierrez@amd.com for (int i = 0; i < tableSize; i++) { 91311308Santhony.gutierrez@amd.com PacketPtr tmpPkt = reqCoalescer[line_addr][i].first; 91411308Santhony.gutierrez@amd.com uint32_t tmpOffset = (tmpPkt->getAddr()) - line_addr; 91511308Santhony.gutierrez@amd.com uint32_t tmpSize = tmpPkt->getSize(); 91611308Santhony.gutierrez@amd.com if (tmpPkt->isAtomicOp()) { 91711308Santhony.gutierrez@amd.com std::pair<int,AtomicOpFunctor *> tmpAtomicOp(tmpOffset, 91811308Santhony.gutierrez@amd.com tmpPkt->getAtomicOp()); 91911308Santhony.gutierrez@amd.com atomicOps.push_back(tmpAtomicOp); 92011308Santhony.gutierrez@amd.com } else if(tmpPkt->isWrite()) { 92111308Santhony.gutierrez@amd.com dataBlock.setData(tmpPkt->getPtr<uint8_t>(), 92211308Santhony.gutierrez@amd.com tmpOffset, tmpSize); 92311308Santhony.gutierrez@amd.com } 92411308Santhony.gutierrez@amd.com for (int j = 0; j < tmpSize; j++) { 92511308Santhony.gutierrez@amd.com accessMask[tmpOffset + j] = true; 92611308Santhony.gutierrez@amd.com } 92711308Santhony.gutierrez@amd.com } 92811308Santhony.gutierrez@amd.com std::shared_ptr<RubyRequest> msg; 92911308Santhony.gutierrez@amd.com if (pkt->isAtomicOp()) { 93011308Santhony.gutierrez@amd.com msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(), 93111308Santhony.gutierrez@amd.com pkt->getPtr<uint8_t>(), 93211308Santhony.gutierrez@amd.com pkt->getSize(), pc, secondary_type, 93311308Santhony.gutierrez@amd.com RubyAccessMode_Supervisor, pkt, 93411308Santhony.gutierrez@amd.com PrefetchBit_No, proc_id, 100, 93511308Santhony.gutierrez@amd.com blockSize, accessMask, 93611308Santhony.gutierrez@amd.com dataBlock, atomicOps, 93711308Santhony.gutierrez@amd.com accessScope, accessSegment); 93811308Santhony.gutierrez@amd.com } else { 93911308Santhony.gutierrez@amd.com msg = std::make_shared<RubyRequest>(clockEdge(), pkt->getAddr(), 94011308Santhony.gutierrez@amd.com pkt->getPtr<uint8_t>(), 94111308Santhony.gutierrez@amd.com pkt->getSize(), pc, secondary_type, 94211308Santhony.gutierrez@amd.com RubyAccessMode_Supervisor, pkt, 94311308Santhony.gutierrez@amd.com PrefetchBit_No, proc_id, 100, 94411308Santhony.gutierrez@amd.com blockSize, accessMask, 94511308Santhony.gutierrez@amd.com dataBlock, 94611308Santhony.gutierrez@amd.com accessScope, accessSegment); 94711308Santhony.gutierrez@amd.com } 94811308Santhony.gutierrez@amd.com DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %s\n", 94911308Santhony.gutierrez@amd.com curTick(), m_version, "Coal", "Begin", "", "", 95011308Santhony.gutierrez@amd.com printAddress(msg->getPhysicalAddress()), 95111308Santhony.gutierrez@amd.com RubyRequestType_to_string(secondary_type)); 95211308Santhony.gutierrez@amd.com 95311308Santhony.gutierrez@amd.com fatal_if(secondary_type == RubyRequestType_IFETCH, 95411308Santhony.gutierrez@amd.com "there should not be any I-Fetch requests in the GPU Coalescer"); 95511308Santhony.gutierrez@amd.com 95611308Santhony.gutierrez@amd.com // Send the message to the cache controller 95711308Santhony.gutierrez@amd.com fatal_if(m_data_cache_hit_latency == 0, 95811308Santhony.gutierrez@amd.com "should not have a latency of zero"); 95911308Santhony.gutierrez@amd.com 96011308Santhony.gutierrez@amd.com assert(m_mandatory_q_ptr); 96111308Santhony.gutierrez@amd.com m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); 96211308Santhony.gutierrez@amd.com} 96311308Santhony.gutierrez@amd.com 96411308Santhony.gutierrez@amd.comtemplate <class KEY, class VALUE> 96511308Santhony.gutierrez@amd.comstd::ostream & 96611308Santhony.gutierrez@amd.comoperator<<(ostream &out, const std::unordered_map<KEY, VALUE> &map) 96711308Santhony.gutierrez@amd.com{ 96811308Santhony.gutierrez@amd.com out << "["; 96911308Santhony.gutierrez@amd.com for (auto i = map.begin(); i != map.end(); ++i) 97011308Santhony.gutierrez@amd.com out << " " << i->first << "=" << i->second; 97111308Santhony.gutierrez@amd.com out << " ]"; 97211308Santhony.gutierrez@amd.com 97311308Santhony.gutierrez@amd.com return out; 97411308Santhony.gutierrez@amd.com} 97511308Santhony.gutierrez@amd.com 97611308Santhony.gutierrez@amd.comvoid 97711308Santhony.gutierrez@amd.comGPUCoalescer::print(ostream& out) const 97811308Santhony.gutierrez@amd.com{ 97911308Santhony.gutierrez@amd.com out << "[GPUCoalescer: " << m_version 98011308Santhony.gutierrez@amd.com << ", outstanding requests: " << m_outstanding_count 98111308Santhony.gutierrez@amd.com << ", read request table: " << m_readRequestTable 98211308Santhony.gutierrez@amd.com << ", write request table: " << m_writeRequestTable 98311308Santhony.gutierrez@amd.com << "]"; 98411308Santhony.gutierrez@amd.com} 98511308Santhony.gutierrez@amd.com 98611308Santhony.gutierrez@amd.com// this can be called from setState whenever coherence permissions are 98711308Santhony.gutierrez@amd.com// upgraded when invoked, coherence violations will be checked for the 98811308Santhony.gutierrez@amd.com// given block 98911308Santhony.gutierrez@amd.comvoid 99011308Santhony.gutierrez@amd.comGPUCoalescer::checkCoherence(Addr addr) 99111308Santhony.gutierrez@amd.com{ 99211308Santhony.gutierrez@amd.com#ifdef CHECK_COHERENCE 99311308Santhony.gutierrez@amd.com m_ruby_system->checkGlobalCoherenceInvariant(addr); 99411308Santhony.gutierrez@amd.com#endif 99511308Santhony.gutierrez@amd.com} 99611308Santhony.gutierrez@amd.com 99711308Santhony.gutierrez@amd.comvoid 99811308Santhony.gutierrez@amd.comGPUCoalescer::recordRequestType(SequencerRequestType requestType) { 99911308Santhony.gutierrez@amd.com DPRINTF(RubyStats, "Recorded statistic: %s\n", 100011308Santhony.gutierrez@amd.com SequencerRequestType_to_string(requestType)); 100111308Santhony.gutierrez@amd.com} 100211308Santhony.gutierrez@amd.com 100311308Santhony.gutierrez@amd.comGPUCoalescer::IssueEvent::IssueEvent(GPUCoalescer* _seq) 100411308Santhony.gutierrez@amd.com : Event(Progress_Event_Pri), seq(_seq) 100511308Santhony.gutierrez@amd.com{ 100611308Santhony.gutierrez@amd.com} 100711308Santhony.gutierrez@amd.com 100811308Santhony.gutierrez@amd.com 100911308Santhony.gutierrez@amd.comvoid 101011308Santhony.gutierrez@amd.comGPUCoalescer::completeIssue() 101111308Santhony.gutierrez@amd.com{ 101211308Santhony.gutierrez@amd.com // newRequests has the cacheline addresses of all the 101311308Santhony.gutierrez@amd.com // requests which need to be issued to the memory subsystem 101411308Santhony.gutierrez@amd.com // in this cycle 101511308Santhony.gutierrez@amd.com int len = newRequests.size(); 101611308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, "Completing issue for %d new requests.\n", len); 101711308Santhony.gutierrez@amd.com for (int i = 0; i < len; ++i) { 101811308Santhony.gutierrez@amd.com // Get the requests from reqCoalescer table. Get only the 101911308Santhony.gutierrez@amd.com // first request for each cacheline, the remaining requests 102011308Santhony.gutierrez@amd.com // can be coalesced with the first request. So, only 102111308Santhony.gutierrez@amd.com // one request is issued per cacheline. 102211308Santhony.gutierrez@amd.com RequestDesc info = reqCoalescer[newRequests[i]][0]; 102311308Santhony.gutierrez@amd.com PacketPtr pkt = info.first; 102411308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, "Completing for newReq %d: paddr %#x\n", 102511308Santhony.gutierrez@amd.com i, pkt->req->getPaddr()); 102611308Santhony.gutierrez@amd.com // Insert this request to the read/writeRequestTables. These tables 102711308Santhony.gutierrez@amd.com // are used to track aliased requests in makeRequest subroutine 102811308Santhony.gutierrez@amd.com bool found = insertRequest(pkt, info.second[PrimaryType]); 102911308Santhony.gutierrez@amd.com 103011308Santhony.gutierrez@amd.com if (found) { 103111308Santhony.gutierrez@amd.com panic("GPUCoalescer::makeRequest should never be called if the " 103211308Santhony.gutierrez@amd.com "request is already outstanding\n"); 103311308Santhony.gutierrez@amd.com } 103411308Santhony.gutierrez@amd.com 103511308Santhony.gutierrez@amd.com // Issue request to ruby subsystem 103611308Santhony.gutierrez@amd.com issueRequest(pkt, info.second[SecondaryType]); 103711308Santhony.gutierrez@amd.com } 103811308Santhony.gutierrez@amd.com newRequests.clear(); 103911308Santhony.gutierrez@amd.com 104011308Santhony.gutierrez@amd.com // have Kernel End releases been issued this cycle 104111308Santhony.gutierrez@amd.com len = newKernelEnds.size(); 104211308Santhony.gutierrez@amd.com for (int i = 0; i < len; i++) { 104311308Santhony.gutierrez@amd.com kernelCallback(newKernelEnds[i]); 104411308Santhony.gutierrez@amd.com } 104511308Santhony.gutierrez@amd.com newKernelEnds.clear(); 104611308Santhony.gutierrez@amd.com} 104711308Santhony.gutierrez@amd.com 104811308Santhony.gutierrez@amd.comvoid 104911308Santhony.gutierrez@amd.comGPUCoalescer::IssueEvent::process() 105011308Santhony.gutierrez@amd.com{ 105111308Santhony.gutierrez@amd.com seq->completeIssue(); 105211308Santhony.gutierrez@amd.com} 105311308Santhony.gutierrez@amd.com 105411308Santhony.gutierrez@amd.comconst char * 105511308Santhony.gutierrez@amd.comGPUCoalescer::IssueEvent::description() const 105611308Santhony.gutierrez@amd.com{ 105711308Santhony.gutierrez@amd.com return "Issue coalesced request"; 105811308Santhony.gutierrez@amd.com} 105911308Santhony.gutierrez@amd.com 106011308Santhony.gutierrez@amd.comvoid 106111308Santhony.gutierrez@amd.comGPUCoalescer::evictionCallback(Addr address) 106211308Santhony.gutierrez@amd.com{ 106311308Santhony.gutierrez@amd.com ruby_eviction_callback(address); 106411308Santhony.gutierrez@amd.com} 106511308Santhony.gutierrez@amd.com 106611308Santhony.gutierrez@amd.comvoid 106711308Santhony.gutierrez@amd.comGPUCoalescer::kernelCallback(int wavefront_id) 106811308Santhony.gutierrez@amd.com{ 106911308Santhony.gutierrez@amd.com assert(kernelEndList.count(wavefront_id)); 107011308Santhony.gutierrez@amd.com 107111308Santhony.gutierrez@amd.com ruby_hit_callback(kernelEndList[wavefront_id]); 107211308Santhony.gutierrez@amd.com 107311308Santhony.gutierrez@amd.com kernelEndList.erase(wavefront_id); 107411308Santhony.gutierrez@amd.com} 107511308Santhony.gutierrez@amd.com 107611308Santhony.gutierrez@amd.comvoid 107711308Santhony.gutierrez@amd.comGPUCoalescer::atomicCallback(Addr address, 107811308Santhony.gutierrez@amd.com MachineType mach, 107911308Santhony.gutierrez@amd.com const DataBlock& data) 108011308Santhony.gutierrez@amd.com{ 108111308Santhony.gutierrez@amd.com assert(address == makeLineAddress(address)); 108211308Santhony.gutierrez@amd.com 108311308Santhony.gutierrez@amd.com DPRINTF(GPUCoalescer, "atomic callback for address %#x\n", address); 108411308Santhony.gutierrez@amd.com assert(m_writeRequestTable.count(makeLineAddress(address))); 108511308Santhony.gutierrez@amd.com 108611308Santhony.gutierrez@amd.com RequestTable::iterator i = m_writeRequestTable.find(address); 108711308Santhony.gutierrez@amd.com assert(i != m_writeRequestTable.end()); 108811308Santhony.gutierrez@amd.com GPUCoalescerRequest* srequest = i->second; 108911308Santhony.gutierrez@amd.com 109011308Santhony.gutierrez@amd.com m_writeRequestTable.erase(i); 109111308Santhony.gutierrez@amd.com markRemoved(); 109211308Santhony.gutierrez@amd.com 109311308Santhony.gutierrez@amd.com assert((srequest->m_type == RubyRequestType_ATOMIC) || 109411308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_ATOMIC_RETURN) || 109511308Santhony.gutierrez@amd.com (srequest->m_type == RubyRequestType_ATOMIC_NO_RETURN)); 109611308Santhony.gutierrez@amd.com 109711308Santhony.gutierrez@amd.com 109811308Santhony.gutierrez@amd.com // Atomics don't write to cache, so there is no MRU update... 109911308Santhony.gutierrez@amd.com 110011308Santhony.gutierrez@amd.com recordMissLatency(srequest, mach, 110111308Santhony.gutierrez@amd.com srequest->issue_time, Cycles(0), Cycles(0), true, false); 110211308Santhony.gutierrez@amd.com 110311308Santhony.gutierrez@amd.com PacketPtr pkt = srequest->pkt; 110411308Santhony.gutierrez@amd.com Addr request_address = pkt->getAddr(); 110511308Santhony.gutierrez@amd.com Addr request_line_address = makeLineAddress(pkt->getAddr()); 110611308Santhony.gutierrez@amd.com 110711308Santhony.gutierrez@amd.com int len = reqCoalescer[request_line_address].size(); 110811308Santhony.gutierrez@amd.com std::vector<PacketPtr> mylist; 110911308Santhony.gutierrez@amd.com for (int i = 0; i < len; ++i) { 111011308Santhony.gutierrez@amd.com PacketPtr pkt = reqCoalescer[request_line_address][i].first; 111111308Santhony.gutierrez@amd.com assert(srequest->m_type == 111211308Santhony.gutierrez@amd.com reqCoalescer[request_line_address][i].second[PrimaryType]); 111311308Santhony.gutierrez@amd.com request_address = (pkt->getAddr()); 111411308Santhony.gutierrez@amd.com request_line_address = makeLineAddress(request_address); 111511308Santhony.gutierrez@amd.com if (pkt->getPtr<uint8_t>() && 111611308Santhony.gutierrez@amd.com srequest->m_type != RubyRequestType_ATOMIC_NO_RETURN) { 111711308Santhony.gutierrez@amd.com /* atomics are done in memory, and return the data *before* the atomic op... */ 111811308Santhony.gutierrez@amd.com memcpy(pkt->getPtr<uint8_t>(), 111911308Santhony.gutierrez@amd.com data.getData(getOffset(request_address), 112011308Santhony.gutierrez@amd.com pkt->getSize()), 112111308Santhony.gutierrez@amd.com pkt->getSize()); 112211308Santhony.gutierrez@amd.com } else { 112311308Santhony.gutierrez@amd.com DPRINTF(MemoryAccess, 112411308Santhony.gutierrez@amd.com "WARNING. Data not transfered from Ruby to M5 for type " \ 112511308Santhony.gutierrez@amd.com "%s\n", 112611308Santhony.gutierrez@amd.com RubyRequestType_to_string(srequest->m_type)); 112711308Santhony.gutierrez@amd.com } 112811308Santhony.gutierrez@amd.com 112911308Santhony.gutierrez@amd.com // If using the RubyTester, update the RubyTester sender state's 113011308Santhony.gutierrez@amd.com // subBlock with the recieved data. The tester will later access 113111308Santhony.gutierrez@amd.com // this state. 113211308Santhony.gutierrez@amd.com // Note: RubyPort will access it's sender state before the 113311308Santhony.gutierrez@amd.com // RubyTester. 113411308Santhony.gutierrez@amd.com if (m_usingRubyTester) { 113511308Santhony.gutierrez@amd.com RubyPort::SenderState *requestSenderState = 113611308Santhony.gutierrez@amd.com safe_cast<RubyPort::SenderState*>(pkt->senderState); 113711308Santhony.gutierrez@amd.com RubyTester::SenderState* testerSenderState = 113811308Santhony.gutierrez@amd.com safe_cast<RubyTester::SenderState*>(requestSenderState->predecessor); 113911308Santhony.gutierrez@amd.com testerSenderState->subBlock.mergeFrom(data); 114011308Santhony.gutierrez@amd.com } 114111308Santhony.gutierrez@amd.com 114211308Santhony.gutierrez@amd.com mylist.push_back(pkt); 114311308Santhony.gutierrez@amd.com } 114411308Santhony.gutierrez@amd.com delete srequest; 114511308Santhony.gutierrez@amd.com reqCoalescer.erase(request_line_address); 114611308Santhony.gutierrez@amd.com assert(!reqCoalescer.count(request_line_address)); 114711308Santhony.gutierrez@amd.com 114811308Santhony.gutierrez@amd.com completeHitCallback(mylist, len); 114911308Santhony.gutierrez@amd.com} 115011308Santhony.gutierrez@amd.com 115111308Santhony.gutierrez@amd.comvoid 115211308Santhony.gutierrez@amd.comGPUCoalescer::recordCPReadCallBack(MachineID myMachID, MachineID senderMachID) 115311308Santhony.gutierrez@amd.com{ 115411308Santhony.gutierrez@amd.com if(myMachID == senderMachID) { 115511308Santhony.gutierrez@amd.com CP_TCPLdHits++; 115611308Santhony.gutierrez@amd.com } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) { 115711308Santhony.gutierrez@amd.com CP_TCPLdTransfers++; 115811308Santhony.gutierrez@amd.com } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) { 115911308Santhony.gutierrez@amd.com CP_TCCLdHits++; 116011308Santhony.gutierrez@amd.com } else { 116111308Santhony.gutierrez@amd.com CP_LdMiss++; 116211308Santhony.gutierrez@amd.com } 116311308Santhony.gutierrez@amd.com} 116411308Santhony.gutierrez@amd.com 116511308Santhony.gutierrez@amd.comvoid 116611308Santhony.gutierrez@amd.comGPUCoalescer::recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID) 116711308Santhony.gutierrez@amd.com{ 116811308Santhony.gutierrez@amd.com if(myMachID == senderMachID) { 116911308Santhony.gutierrez@amd.com CP_TCPStHits++; 117011308Santhony.gutierrez@amd.com } else if(machineIDToMachineType(senderMachID) == MachineType_TCP) { 117111308Santhony.gutierrez@amd.com CP_TCPStTransfers++; 117211308Santhony.gutierrez@amd.com } else if(machineIDToMachineType(senderMachID) == MachineType_TCC) { 117311308Santhony.gutierrez@amd.com CP_TCCStHits++; 117411308Santhony.gutierrez@amd.com } else { 117511308Santhony.gutierrez@amd.com CP_StMiss++; 117611308Santhony.gutierrez@amd.com } 117711308Santhony.gutierrez@amd.com} 117811308Santhony.gutierrez@amd.com 117911308Santhony.gutierrez@amd.comvoid 118011308Santhony.gutierrez@amd.comGPUCoalescer::completeHitCallback(std::vector<PacketPtr> & mylist, int len) 118111308Santhony.gutierrez@amd.com{ 118211308Santhony.gutierrez@amd.com for (int i = 0; i < len; ++i) { 118311308Santhony.gutierrez@amd.com RubyPort::SenderState *ss = 118411308Santhony.gutierrez@amd.com safe_cast<RubyPort::SenderState *>(mylist[i]->senderState); 118511308Santhony.gutierrez@amd.com MemSlavePort *port = ss->port; 118611308Santhony.gutierrez@amd.com assert(port != NULL); 118711308Santhony.gutierrez@amd.com 118811308Santhony.gutierrez@amd.com mylist[i]->senderState = ss->predecessor; 118911308Santhony.gutierrez@amd.com delete ss; 119011308Santhony.gutierrez@amd.com port->hitCallback(mylist[i]); 119111308Santhony.gutierrez@amd.com trySendRetries(); 119211308Santhony.gutierrez@amd.com } 119311308Santhony.gutierrez@amd.com 119411308Santhony.gutierrez@amd.com testDrainComplete(); 119511308Santhony.gutierrez@amd.com} 119611308Santhony.gutierrez@amd.com 119711308Santhony.gutierrez@amd.comPacketPtr 119811308Santhony.gutierrez@amd.comGPUCoalescer::mapAddrToPkt(Addr address) 119911308Santhony.gutierrez@amd.com{ 120011308Santhony.gutierrez@amd.com RequestTable::iterator i = m_readRequestTable.find(address); 120111308Santhony.gutierrez@amd.com assert(i != m_readRequestTable.end()); 120211308Santhony.gutierrez@amd.com GPUCoalescerRequest* request = i->second; 120311308Santhony.gutierrez@amd.com return request->pkt; 120411308Santhony.gutierrez@amd.com} 120511308Santhony.gutierrez@amd.com 120611308Santhony.gutierrez@amd.comvoid 120711308Santhony.gutierrez@amd.comGPUCoalescer::recordMissLatency(GPUCoalescerRequest* srequest, 120811308Santhony.gutierrez@amd.com MachineType mach, 120911308Santhony.gutierrez@amd.com Cycles initialRequestTime, 121011308Santhony.gutierrez@amd.com Cycles forwardRequestTime, 121111308Santhony.gutierrez@amd.com Cycles firstResponseTime, 121211308Santhony.gutierrez@amd.com bool success, bool isRegion) 121311308Santhony.gutierrez@amd.com{ 121411308Santhony.gutierrez@amd.com RubyRequestType type = srequest->m_type; 121511308Santhony.gutierrez@amd.com Cycles issued_time = srequest->issue_time; 121611308Santhony.gutierrez@amd.com Cycles completion_time = curCycle(); 121711308Santhony.gutierrez@amd.com assert(completion_time >= issued_time); 121811308Santhony.gutierrez@amd.com Cycles total_lat = completion_time - issued_time; 121911308Santhony.gutierrez@amd.com 122011308Santhony.gutierrez@amd.com // cache stats (valid for RfO protocol only) 122111308Santhony.gutierrez@amd.com if (mach == MachineType_TCP) { 122211308Santhony.gutierrez@amd.com if (type == RubyRequestType_LD) { 122311308Santhony.gutierrez@amd.com GPU_TCPLdHits++; 122411308Santhony.gutierrez@amd.com } else { 122511308Santhony.gutierrez@amd.com GPU_TCPStHits++; 122611308Santhony.gutierrez@amd.com } 122711308Santhony.gutierrez@amd.com } else if (mach == MachineType_L1Cache_wCC) { 122811308Santhony.gutierrez@amd.com if (type == RubyRequestType_LD) { 122911308Santhony.gutierrez@amd.com GPU_TCPLdTransfers++; 123011308Santhony.gutierrez@amd.com } else { 123111308Santhony.gutierrez@amd.com GPU_TCPStTransfers++; 123211308Santhony.gutierrez@amd.com } 123311308Santhony.gutierrez@amd.com } else if (mach == MachineType_TCC) { 123411308Santhony.gutierrez@amd.com if (type == RubyRequestType_LD) { 123511308Santhony.gutierrez@amd.com GPU_TCCLdHits++; 123611308Santhony.gutierrez@amd.com } else { 123711308Santhony.gutierrez@amd.com GPU_TCCStHits++; 123811308Santhony.gutierrez@amd.com } 123911308Santhony.gutierrez@amd.com } else { 124011308Santhony.gutierrez@amd.com if (type == RubyRequestType_LD) { 124111308Santhony.gutierrez@amd.com GPU_LdMiss++; 124211308Santhony.gutierrez@amd.com } else { 124311308Santhony.gutierrez@amd.com GPU_StMiss++; 124411308Santhony.gutierrez@amd.com } 124511308Santhony.gutierrez@amd.com } 124611308Santhony.gutierrez@amd.com 124711308Santhony.gutierrez@amd.com // Profile all access latency, even zero latency accesses 124811308Santhony.gutierrez@amd.com m_latencyHist.sample(total_lat); 124911308Santhony.gutierrez@amd.com m_typeLatencyHist[type]->sample(total_lat); 125011308Santhony.gutierrez@amd.com 125111308Santhony.gutierrez@amd.com // Profile the miss latency for all non-zero demand misses 125211308Santhony.gutierrez@amd.com if (total_lat != Cycles(0)) { 125311308Santhony.gutierrez@amd.com m_missLatencyHist.sample(total_lat); 125411308Santhony.gutierrez@amd.com m_missTypeLatencyHist[type]->sample(total_lat); 125511308Santhony.gutierrez@amd.com 125611308Santhony.gutierrez@amd.com if (mach != MachineType_NUM) { 125711308Santhony.gutierrez@amd.com m_missMachLatencyHist[mach]->sample(total_lat); 125811308Santhony.gutierrez@amd.com m_missTypeMachLatencyHist[type][mach]->sample(total_lat); 125911308Santhony.gutierrez@amd.com 126011308Santhony.gutierrez@amd.com if ((issued_time <= initialRequestTime) && 126111308Santhony.gutierrez@amd.com (initialRequestTime <= forwardRequestTime) && 126211308Santhony.gutierrez@amd.com (forwardRequestTime <= firstResponseTime) && 126311308Santhony.gutierrez@amd.com (firstResponseTime <= completion_time)) { 126411308Santhony.gutierrez@amd.com 126511308Santhony.gutierrez@amd.com m_IssueToInitialDelayHist[mach]->sample( 126611308Santhony.gutierrez@amd.com initialRequestTime - issued_time); 126711308Santhony.gutierrez@amd.com m_InitialToForwardDelayHist[mach]->sample( 126811308Santhony.gutierrez@amd.com forwardRequestTime - initialRequestTime); 126911308Santhony.gutierrez@amd.com m_ForwardToFirstResponseDelayHist[mach]->sample( 127011308Santhony.gutierrez@amd.com firstResponseTime - forwardRequestTime); 127111308Santhony.gutierrez@amd.com m_FirstResponseToCompletionDelayHist[mach]->sample( 127211308Santhony.gutierrez@amd.com completion_time - firstResponseTime); 127311308Santhony.gutierrez@amd.com } 127411308Santhony.gutierrez@amd.com } 127511308Santhony.gutierrez@amd.com 127611308Santhony.gutierrez@amd.com } 127711308Santhony.gutierrez@amd.com 127811308Santhony.gutierrez@amd.com DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %s %d cycles\n", 127911308Santhony.gutierrez@amd.com curTick(), m_version, "Coal", 128011308Santhony.gutierrez@amd.com success ? "Done" : "SC_Failed", "", "", 128111308Santhony.gutierrez@amd.com printAddress(srequest->pkt->getAddr()), total_lat); 128211308Santhony.gutierrez@amd.com} 128311308Santhony.gutierrez@amd.com 128411308Santhony.gutierrez@amd.comvoid 128511308Santhony.gutierrez@amd.comGPUCoalescer::regStats() 128611308Santhony.gutierrez@amd.com{ 128711308Santhony.gutierrez@amd.com // These statistical variables are not for display. 128811308Santhony.gutierrez@amd.com // The profiler will collate these across different 128911308Santhony.gutierrez@amd.com // coalescers and display those collated statistics. 129011308Santhony.gutierrez@amd.com m_outstandReqHist.init(10); 129111308Santhony.gutierrez@amd.com m_latencyHist.init(10); 129211308Santhony.gutierrez@amd.com m_missLatencyHist.init(10); 129311308Santhony.gutierrez@amd.com 129411308Santhony.gutierrez@amd.com for (int i = 0; i < RubyRequestType_NUM; i++) { 129511308Santhony.gutierrez@amd.com m_typeLatencyHist.push_back(new Stats::Histogram()); 129611308Santhony.gutierrez@amd.com m_typeLatencyHist[i]->init(10); 129711308Santhony.gutierrez@amd.com 129811308Santhony.gutierrez@amd.com m_missTypeLatencyHist.push_back(new Stats::Histogram()); 129911308Santhony.gutierrez@amd.com m_missTypeLatencyHist[i]->init(10); 130011308Santhony.gutierrez@amd.com } 130111308Santhony.gutierrez@amd.com 130211308Santhony.gutierrez@amd.com for (int i = 0; i < MachineType_NUM; i++) { 130311308Santhony.gutierrez@amd.com m_missMachLatencyHist.push_back(new Stats::Histogram()); 130411308Santhony.gutierrez@amd.com m_missMachLatencyHist[i]->init(10); 130511308Santhony.gutierrez@amd.com 130611308Santhony.gutierrez@amd.com m_IssueToInitialDelayHist.push_back(new Stats::Histogram()); 130711308Santhony.gutierrez@amd.com m_IssueToInitialDelayHist[i]->init(10); 130811308Santhony.gutierrez@amd.com 130911308Santhony.gutierrez@amd.com m_InitialToForwardDelayHist.push_back(new Stats::Histogram()); 131011308Santhony.gutierrez@amd.com m_InitialToForwardDelayHist[i]->init(10); 131111308Santhony.gutierrez@amd.com 131211308Santhony.gutierrez@amd.com m_ForwardToFirstResponseDelayHist.push_back(new Stats::Histogram()); 131311308Santhony.gutierrez@amd.com m_ForwardToFirstResponseDelayHist[i]->init(10); 131411308Santhony.gutierrez@amd.com 131511308Santhony.gutierrez@amd.com m_FirstResponseToCompletionDelayHist.push_back(new Stats::Histogram()); 131611308Santhony.gutierrez@amd.com m_FirstResponseToCompletionDelayHist[i]->init(10); 131711308Santhony.gutierrez@amd.com } 131811308Santhony.gutierrez@amd.com 131911308Santhony.gutierrez@amd.com for (int i = 0; i < RubyRequestType_NUM; i++) { 132011308Santhony.gutierrez@amd.com m_missTypeMachLatencyHist.push_back(std::vector<Stats::Histogram *>()); 132111308Santhony.gutierrez@amd.com 132211308Santhony.gutierrez@amd.com for (int j = 0; j < MachineType_NUM; j++) { 132311308Santhony.gutierrez@amd.com m_missTypeMachLatencyHist[i].push_back(new Stats::Histogram()); 132411308Santhony.gutierrez@amd.com m_missTypeMachLatencyHist[i][j]->init(10); 132511308Santhony.gutierrez@amd.com } 132611308Santhony.gutierrez@amd.com } 132711308Santhony.gutierrez@amd.com 132811308Santhony.gutierrez@amd.com // GPU cache stats 132911308Santhony.gutierrez@amd.com GPU_TCPLdHits 133011308Santhony.gutierrez@amd.com .name(name() + ".gpu_tcp_ld_hits") 133111308Santhony.gutierrez@amd.com .desc("loads that hit in the TCP") 133211308Santhony.gutierrez@amd.com ; 133311308Santhony.gutierrez@amd.com GPU_TCPLdTransfers 133411308Santhony.gutierrez@amd.com .name(name() + ".gpu_tcp_ld_transfers") 133511308Santhony.gutierrez@amd.com .desc("TCP to TCP load transfers") 133611308Santhony.gutierrez@amd.com ; 133711308Santhony.gutierrez@amd.com GPU_TCCLdHits 133811308Santhony.gutierrez@amd.com .name(name() + ".gpu_tcc_ld_hits") 133911308Santhony.gutierrez@amd.com .desc("loads that hit in the TCC") 134011308Santhony.gutierrez@amd.com ; 134111308Santhony.gutierrez@amd.com GPU_LdMiss 134211308Santhony.gutierrez@amd.com .name(name() + ".gpu_ld_misses") 134311308Santhony.gutierrez@amd.com .desc("loads that miss in the GPU") 134411308Santhony.gutierrez@amd.com ; 134511308Santhony.gutierrez@amd.com 134611308Santhony.gutierrez@amd.com GPU_TCPStHits 134711308Santhony.gutierrez@amd.com .name(name() + ".gpu_tcp_st_hits") 134811308Santhony.gutierrez@amd.com .desc("stores that hit in the TCP") 134911308Santhony.gutierrez@amd.com ; 135011308Santhony.gutierrez@amd.com GPU_TCPStTransfers 135111308Santhony.gutierrez@amd.com .name(name() + ".gpu_tcp_st_transfers") 135211308Santhony.gutierrez@amd.com .desc("TCP to TCP store transfers") 135311308Santhony.gutierrez@amd.com ; 135411308Santhony.gutierrez@amd.com GPU_TCCStHits 135511308Santhony.gutierrez@amd.com .name(name() + ".gpu_tcc_st_hits") 135611308Santhony.gutierrez@amd.com .desc("stores that hit in the TCC") 135711308Santhony.gutierrez@amd.com ; 135811308Santhony.gutierrez@amd.com GPU_StMiss 135911308Santhony.gutierrez@amd.com .name(name() + ".gpu_st_misses") 136011308Santhony.gutierrez@amd.com .desc("stores that miss in the GPU") 136111308Santhony.gutierrez@amd.com ; 136211308Santhony.gutierrez@amd.com 136311308Santhony.gutierrez@amd.com // CP cache stats 136411308Santhony.gutierrez@amd.com CP_TCPLdHits 136511308Santhony.gutierrez@amd.com .name(name() + ".cp_tcp_ld_hits") 136611308Santhony.gutierrez@amd.com .desc("loads that hit in the TCP") 136711308Santhony.gutierrez@amd.com ; 136811308Santhony.gutierrez@amd.com CP_TCPLdTransfers 136911308Santhony.gutierrez@amd.com .name(name() + ".cp_tcp_ld_transfers") 137011308Santhony.gutierrez@amd.com .desc("TCP to TCP load transfers") 137111308Santhony.gutierrez@amd.com ; 137211308Santhony.gutierrez@amd.com CP_TCCLdHits 137311308Santhony.gutierrez@amd.com .name(name() + ".cp_tcc_ld_hits") 137411308Santhony.gutierrez@amd.com .desc("loads that hit in the TCC") 137511308Santhony.gutierrez@amd.com ; 137611308Santhony.gutierrez@amd.com CP_LdMiss 137711308Santhony.gutierrez@amd.com .name(name() + ".cp_ld_misses") 137811308Santhony.gutierrez@amd.com .desc("loads that miss in the GPU") 137911308Santhony.gutierrez@amd.com ; 138011308Santhony.gutierrez@amd.com 138111308Santhony.gutierrez@amd.com CP_TCPStHits 138211308Santhony.gutierrez@amd.com .name(name() + ".cp_tcp_st_hits") 138311308Santhony.gutierrez@amd.com .desc("stores that hit in the TCP") 138411308Santhony.gutierrez@amd.com ; 138511308Santhony.gutierrez@amd.com CP_TCPStTransfers 138611308Santhony.gutierrez@amd.com .name(name() + ".cp_tcp_st_transfers") 138711308Santhony.gutierrez@amd.com .desc("TCP to TCP store transfers") 138811308Santhony.gutierrez@amd.com ; 138911308Santhony.gutierrez@amd.com CP_TCCStHits 139011308Santhony.gutierrez@amd.com .name(name() + ".cp_tcc_st_hits") 139111308Santhony.gutierrez@amd.com .desc("stores that hit in the TCC") 139211308Santhony.gutierrez@amd.com ; 139311308Santhony.gutierrez@amd.com CP_StMiss 139411308Santhony.gutierrez@amd.com .name(name() + ".cp_st_misses") 139511308Santhony.gutierrez@amd.com .desc("stores that miss in the GPU") 139611308Santhony.gutierrez@amd.com ; 139711308Santhony.gutierrez@amd.com} 1398