GPUCoalescer.hh revision 12697
19243SN/A/* 210206Sandreas.hansson@arm.com * Copyright (c) 2013-2015 Advanced Micro Devices, Inc. 39243SN/A * All rights reserved. 49243SN/A * 59243SN/A * For use for simulation and test purposes only 69243SN/A * 79243SN/A * Redistribution and use in source and binary forms, with or without 89243SN/A * modification, are permitted provided that the following conditions are met: 99243SN/A * 109243SN/A * 1. Redistributions of source code must retain the above copyright notice, 119243SN/A * this list of conditions and the following disclaimer. 129243SN/A * 139243SN/A * 2. Redistributions in binary form must reproduce the above copyright notice, 149831SN/A * this list of conditions and the following disclaimer in the documentation 159831SN/A * and/or other materials provided with the distribution. 169831SN/A * 179243SN/A * 3. Neither the name of the copyright holder nor the names of its 189243SN/A * contributors may be used to endorse or promote products derived from this 199243SN/A * software without specific prior written permission. 209243SN/A * 219243SN/A * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 229243SN/A * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 239243SN/A * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 249243SN/A * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 259243SN/A * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 269243SN/A * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 279243SN/A * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 289243SN/A * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 299243SN/A * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 309243SN/A * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 319243SN/A * POSSIBILITY OF SUCH DAMAGE. 329243SN/A * 339243SN/A * Authors: Sooraj Puthoor 349243SN/A */ 359243SN/A 369243SN/A#ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ 379243SN/A#define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ 389243SN/A 399243SN/A#include <iostream> 409243SN/A#include <unordered_map> 419243SN/A 429967SN/A#include "base/statistics.hh" 439243SN/A#include "mem/protocol/HSAScope.hh" 449243SN/A#include "mem/protocol/HSASegment.hh" 459243SN/A#include "mem/protocol/PrefetchBit.hh" 469243SN/A#include "mem/protocol/RubyAccessMode.hh" 4710146Sandreas.hansson@arm.com#include "mem/protocol/RubyRequestType.hh" 489243SN/A#include "mem/protocol/SequencerRequestType.hh" 499243SN/A#include "mem/request.hh" 5010146Sandreas.hansson@arm.com#include "mem/ruby/common/Address.hh" 5110146Sandreas.hansson@arm.com#include "mem/ruby/common/Consumer.hh" 529243SN/A#include "mem/ruby/system/Sequencer.hh" 539488SN/A 549488SN/Aclass DataBlock; 559243SN/Aclass CacheMsg; 569243SN/Aclass MachineID; 579243SN/Aclass CacheMemory; 589243SN/A 599243SN/Aclass RubyGPUCoalescerParams; 609243SN/A 6110146Sandreas.hansson@arm.comHSAScope reqScopeToHSAScope(Request* req); 629243SN/AHSASegment reqSegmentToHSASegment(Request* req); 639243SN/A 649243SN/Astruct GPUCoalescerRequest 6510287Sandreas.hansson@arm.com{ 6610287Sandreas.hansson@arm.com PacketPtr pkt; 6710287Sandreas.hansson@arm.com RubyRequestType m_type; 6810287Sandreas.hansson@arm.com Cycles issue_time; 6910287Sandreas.hansson@arm.com 709243SN/A GPUCoalescerRequest(PacketPtr _pkt, RubyRequestType _m_type, 7110287Sandreas.hansson@arm.com Cycles _issue_time) 7210287Sandreas.hansson@arm.com : pkt(_pkt), m_type(_m_type), issue_time(_issue_time) 7310287Sandreas.hansson@arm.com {} 7410287Sandreas.hansson@arm.com}; 7510287Sandreas.hansson@arm.com 7610287Sandreas.hansson@arm.comclass RequestDesc 7710287Sandreas.hansson@arm.com{ 7810287Sandreas.hansson@arm.com public: 7910287Sandreas.hansson@arm.com RequestDesc(PacketPtr pkt, RubyRequestType p_type, RubyRequestType s_type) 8010287Sandreas.hansson@arm.com : pkt(pkt), primaryType(p_type), secondaryType(s_type) 8110287Sandreas.hansson@arm.com { 8210287Sandreas.hansson@arm.com } 8310287Sandreas.hansson@arm.com 849243SN/A RequestDesc() : pkt(nullptr), primaryType(RubyRequestType_NULL), 8510146Sandreas.hansson@arm.com secondaryType(RubyRequestType_NULL) 869243SN/A { 879243SN/A } 889243SN/A 899243SN/A PacketPtr pkt; 909243SN/A RubyRequestType primaryType; 919243SN/A RubyRequestType secondaryType; 929243SN/A}; 939243SN/A 949243SN/Astd::ostream& operator<<(std::ostream& out, const GPUCoalescerRequest& obj); 959243SN/A 9610146Sandreas.hansson@arm.comclass GPUCoalescer : public RubyPort 979243SN/A{ 989243SN/A public: 999243SN/A typedef RubyGPUCoalescerParams Params; 10010146Sandreas.hansson@arm.com GPUCoalescer(const Params *); 1019243SN/A ~GPUCoalescer(); 1029243SN/A 1039243SN/A // Public Methods 1049243SN/A void wakeup(); // Used only for deadlock detection 1059243SN/A 1069243SN/A void printProgress(std::ostream& out) const; 1079243SN/A void resetStats(); 1089243SN/A void collateStats(); 1099243SN/A void regStats(); 1109243SN/A 1119243SN/A void writeCallback(Addr address, DataBlock& data); 1129243SN/A 1139243SN/A void writeCallback(Addr address, 1149243SN/A MachineType mach, 1159243SN/A DataBlock& data); 1169243SN/A 1179243SN/A void writeCallback(Addr address, 1189243SN/A MachineType mach, 1199243SN/A DataBlock& data, 1209243SN/A Cycles initialRequestTime, 1219243SN/A Cycles forwardRequestTime, 1229243SN/A Cycles firstResponseTime, 1239243SN/A bool isRegion); 1249243SN/A 1259243SN/A void writeCallback(Addr address, 1269243SN/A MachineType mach, 12710206Sandreas.hansson@arm.com DataBlock& data, 12810206Sandreas.hansson@arm.com Cycles initialRequestTime, 1299243SN/A Cycles forwardRequestTime, 13010206Sandreas.hansson@arm.com Cycles firstResponseTime); 13110206Sandreas.hansson@arm.com 13210206Sandreas.hansson@arm.com void readCallback(Addr address, DataBlock& data); 13310206Sandreas.hansson@arm.com 13410206Sandreas.hansson@arm.com void readCallback(Addr address, 13510206Sandreas.hansson@arm.com MachineType mach, 13610206Sandreas.hansson@arm.com DataBlock& data); 13710206Sandreas.hansson@arm.com 1389243SN/A void readCallback(Addr address, 1399488SN/A MachineType mach, 1409969SN/A DataBlock& data, 1419488SN/A Cycles initialRequestTime, 1429243SN/A Cycles forwardRequestTime, 14310210Sandreas.hansson@arm.com Cycles firstResponseTime); 14410210Sandreas.hansson@arm.com 14510211Sandreas.hansson@arm.com void readCallback(Addr address, 14610211Sandreas.hansson@arm.com MachineType mach, 14710210Sandreas.hansson@arm.com DataBlock& data, 14810210Sandreas.hansson@arm.com Cycles initialRequestTime, 14910210Sandreas.hansson@arm.com Cycles forwardRequestTime, 1509243SN/A Cycles firstResponseTime, 1519243SN/A bool isRegion); 1529243SN/A /* atomics need their own callback because the data 1539243SN/A might be const coming from SLICC */ 1549243SN/A void atomicCallback(Addr address, 1559243SN/A MachineType mach, 15610207Sandreas.hansson@arm.com const DataBlock& data); 1579243SN/A 1589243SN/A void recordCPReadCallBack(MachineID myMachID, MachineID senderMachID); 15910246Sandreas.hansson@arm.com void recordCPWriteCallBack(MachineID myMachID, MachineID senderMachID); 16010246Sandreas.hansson@arm.com 16110394Swendy.elsasser@arm.com // Alternate implementations in VIPER Coalescer 1629243SN/A virtual RequestStatus makeRequest(PacketPtr pkt); 16310211Sandreas.hansson@arm.com 16410210Sandreas.hansson@arm.com int outstandingCount() const { return m_outstanding_count; } 1659969SN/A 1669243SN/A bool 16710141SN/A isDeadlockEventScheduled() const 1689727SN/A { 1699727SN/A return deadlockCheckEvent.scheduled(); 1709727SN/A } 17110394Swendy.elsasser@arm.com 17210246Sandreas.hansson@arm.com void 17310141SN/A descheduleDeadlockEvent() 1749243SN/A { 1759243SN/A deschedule(deadlockCheckEvent); 1769243SN/A } 1779243SN/A 1789831SN/A bool empty() const; 1799831SN/A 1809831SN/A void print(std::ostream& out) const; 1819831SN/A void checkCoherence(Addr address); 1829831SN/A 1839831SN/A void markRemoved(); 1849831SN/A void removeRequest(GPUCoalescerRequest* request); 1859831SN/A void evictionCallback(Addr address); 1869831SN/A void completeIssue(); 1879831SN/A 1889831SN/A void insertKernel(int wavefront_id, PacketPtr pkt); 1899831SN/A 1909831SN/A void recordRequestType(SequencerRequestType requestType); 1919831SN/A Stats::Histogram& getOutstandReqHist() { return m_outstandReqHist; } 1929831SN/A 1939831SN/A Stats::Histogram& getLatencyHist() { return m_latencyHist; } 1949831SN/A Stats::Histogram& getTypeLatencyHist(uint32_t t) 1959831SN/A { return *m_typeLatencyHist[t]; } 1969831SN/A 1979831SN/A Stats::Histogram& getMissLatencyHist() 1989831SN/A { return m_missLatencyHist; } 1999243SN/A Stats::Histogram& getMissTypeLatencyHist(uint32_t t) 2009243SN/A { return *m_missTypeLatencyHist[t]; } 2019243SN/A 2029243SN/A Stats::Histogram& getMissMachLatencyHist(uint32_t t) const 2039243SN/A { return *m_missMachLatencyHist[t]; } 2049243SN/A 2059243SN/A Stats::Histogram& 2069243SN/A getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const 2079243SN/A { return *m_missTypeMachLatencyHist[r][t]; } 2089243SN/A 2099243SN/A Stats::Histogram& getIssueToInitialDelayHist(uint32_t t) const 2109243SN/A { return *m_IssueToInitialDelayHist[t]; } 2119243SN/A 2129243SN/A Stats::Histogram& 2139243SN/A getInitialToForwardDelayHist(const MachineType t) const 2149243SN/A { return *m_InitialToForwardDelayHist[t]; } 2159966SN/A 2169966SN/A Stats::Histogram& 2179243SN/A getForwardRequestToFirstResponseHist(const MachineType t) const 2189243SN/A { return *m_ForwardToFirstResponseDelayHist[t]; } 2199967SN/A 22010245Sandreas.hansson@arm.com Stats::Histogram& 2219831SN/A getFirstResponseToCompletionDelayHist(const MachineType t) const 2229831SN/A { return *m_FirstResponseToCompletionDelayHist[t]; } 2239967SN/A 2249967SN/A // Changed to protected to enable inheritance by VIPER Coalescer 2259967SN/A protected: 2269967SN/A bool tryCacheAccess(Addr addr, RubyRequestType type, 2279967SN/A Addr pc, RubyAccessMode access_mode, 2289967SN/A int size, DataBlock*& data_ptr); 2299967SN/A // Alternate implementations in VIPER Coalescer 2309831SN/A virtual void issueRequest(PacketPtr pkt, RubyRequestType type); 2319831SN/A 2329831SN/A void kernelCallback(int wavfront_id); 2339831SN/A 2349831SN/A void hitCallback(GPUCoalescerRequest* request, 2359832SN/A MachineType mach, 2369831SN/A DataBlock& data, 2379831SN/A bool success, 2389831SN/A Cycles initialRequestTime, 2399831SN/A Cycles forwardRequestTime, 2409831SN/A Cycles firstResponseTime, 2419832SN/A bool isRegion); 2429831SN/A void recordMissLatency(GPUCoalescerRequest* request, 2439831SN/A MachineType mach, 2449831SN/A Cycles initialRequestTime, 2459831SN/A Cycles forwardRequestTime, 2469831SN/A Cycles firstResponseTime, 2479831SN/A bool success, bool isRegion); 2489967SN/A void completeHitCallback(std::vector<PacketPtr> & mylist, int len); 2499243SN/A PacketPtr mapAddrToPkt(Addr address); 2509967SN/A 25110245Sandreas.hansson@arm.com 2529967SN/A RequestStatus getRequestStatus(PacketPtr pkt, 2539243SN/A RubyRequestType request_type); 2549967SN/A bool insertRequest(PacketPtr pkt, RubyRequestType request_type); 2559967SN/A 2569967SN/A bool handleLlsc(Addr address, GPUCoalescerRequest* request); 2579243SN/A 2589243SN/A EventFunctionWrapper issueEvent; 2599243SN/A 2609243SN/A 2619243SN/A // Changed to protected to enable inheritance by VIPER Coalescer 2629243SN/A protected: 26310206Sandreas.hansson@arm.com int m_max_outstanding_requests; 26410206Sandreas.hansson@arm.com int m_deadlock_threshold; 2659243SN/A 2669243SN/A CacheMemory* m_dataCache_ptr; 26710208Sandreas.hansson@arm.com CacheMemory* m_instCache_ptr; 26810208Sandreas.hansson@arm.com 26910208Sandreas.hansson@arm.com // The cache access latency for this GPU data cache. This is assessed at the 2709243SN/A // beginning of each access. This should be very similar to the 27110146Sandreas.hansson@arm.com // implementation in Sequencer() as this is very much like a Sequencer 2729243SN/A Cycles m_data_cache_hit_latency; 27310208Sandreas.hansson@arm.com 27410208Sandreas.hansson@arm.com // We need to track both the primary and secondary request types. 27510208Sandreas.hansson@arm.com // The secondary request type comprises a subset of RubyRequestTypes that 27610208Sandreas.hansson@arm.com // are understood by the L1 Controller. A primary request type can be any 27710208Sandreas.hansson@arm.com // RubyRequestType. 27810208Sandreas.hansson@arm.com typedef std::unordered_map<Addr, std::vector<RequestDesc>> CoalescingTable; 2799243SN/A CoalescingTable reqCoalescer; 28010146Sandreas.hansson@arm.com std::vector<Addr> newRequests; 2819243SN/A 28210208Sandreas.hansson@arm.com typedef std::unordered_map<Addr, GPUCoalescerRequest*> RequestTable; 28310208Sandreas.hansson@arm.com RequestTable m_writeRequestTable; 2849243SN/A RequestTable m_readRequestTable; 2859243SN/A // Global outstanding request count, across all request tables 2869243SN/A int m_outstanding_count; 2879243SN/A bool m_deadlock_check_scheduled; 2889831SN/A std::unordered_map<int, PacketPtr> kernelEndList; 2899243SN/A std::vector<int> newKernelEnds; 2909243SN/A 2919831SN/A int m_store_waiting_on_load_cycles; 2929243SN/A int m_store_waiting_on_store_cycles; 2939243SN/A int m_load_waiting_on_store_cycles; 2949243SN/A int m_load_waiting_on_load_cycles; 2959243SN/A 2969831SN/A bool m_runningGarnetStandalone; 2979243SN/A 2989243SN/A EventFunctionWrapper deadlockCheckEvent; 2999831SN/A bool assumingRfOCoherence; 3009243SN/A 3019243SN/A // m5 style stats for TCP hit/miss counts 3029243SN/A Stats::Scalar GPU_TCPLdHits; 3039243SN/A Stats::Scalar GPU_TCPLdTransfers; 3049831SN/A Stats::Scalar GPU_TCCLdHits; 3059831SN/A Stats::Scalar GPU_LdMiss; 3069831SN/A 3079243SN/A Stats::Scalar GPU_TCPStHits; 3089243SN/A Stats::Scalar GPU_TCPStTransfers; 3099243SN/A Stats::Scalar GPU_TCCStHits; 3109243SN/A Stats::Scalar GPU_StMiss; 3119831SN/A 3129831SN/A Stats::Scalar CP_TCPLdHits; 3139831SN/A Stats::Scalar CP_TCPLdTransfers; 3149243SN/A Stats::Scalar CP_TCCLdHits; 3159831SN/A Stats::Scalar CP_LdMiss; 3169243SN/A 3179243SN/A Stats::Scalar CP_TCPStHits; 3189243SN/A Stats::Scalar CP_TCPStTransfers; 3199243SN/A Stats::Scalar CP_TCCStHits; 3209243SN/A Stats::Scalar CP_StMiss; 3219243SN/A 3229243SN/A //! Histogram for number of outstanding requests per cycle. 3239243SN/A Stats::Histogram m_outstandReqHist; 3249831SN/A 3259831SN/A //! Histogram for holding latency profile of all requests. 3269831SN/A Stats::Histogram m_latencyHist; 3279243SN/A std::vector<Stats::Histogram *> m_typeLatencyHist; 3289831SN/A 3299243SN/A //! Histogram for holding latency profile of all requests that 3309243SN/A //! miss in the controller connected to this sequencer. 3319243SN/A Stats::Histogram m_missLatencyHist; 3329243SN/A std::vector<Stats::Histogram *> m_missTypeLatencyHist; 3339243SN/A 3349243SN/A //! Histograms for profiling the latencies for requests that 3359243SN/A //! required external messages. 3369243SN/A std::vector<Stats::Histogram *> m_missMachLatencyHist; 3379243SN/A std::vector< std::vector<Stats::Histogram *> > m_missTypeMachLatencyHist; 3389243SN/A 3399243SN/A //! Histograms for recording the breakdown of miss latency 3409243SN/A std::vector<Stats::Histogram *> m_IssueToInitialDelayHist; 3419243SN/A std::vector<Stats::Histogram *> m_InitialToForwardDelayHist; 3429243SN/A std::vector<Stats::Histogram *> m_ForwardToFirstResponseDelayHist; 3439243SN/A std::vector<Stats::Histogram *> m_FirstResponseToCompletionDelayHist; 3449243SN/A 3459243SN/Aprivate: 3469243SN/A // Private copy constructor and assignment operator 3479243SN/A GPUCoalescer(const GPUCoalescer& obj); 3489243SN/A GPUCoalescer& operator=(const GPUCoalescer& obj); 3499726SN/A}; 3509243SN/A 3519726SN/Ainline std::ostream& 3529243SN/Aoperator<<(std::ostream& out, const GPUCoalescer& obj) 3539243SN/A{ 3549243SN/A obj.print(out); 3559831SN/A out << std::flush; 3569831SN/A return out; 3579831SN/A} 3589243SN/A 3599243SN/A#endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ 3609831SN/A