VIPERCoalescer.cc revision 11321:02e930db812d
1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Sooraj Puthoor
34 */
35
36#include "base/misc.hh"
37#include "base/str.hh"
38#include "config/the_isa.hh"
39
40#if THE_ISA == X86_ISA
41#include "arch/x86/insts/microldstop.hh"
42
43#endif // X86_ISA
44#include "mem/ruby/system/VIPERCoalescer.hh"
45
46#include "cpu/testers/rubytest/RubyTester.hh"
47#include "debug/GPUCoalescer.hh"
48#include "debug/MemoryAccess.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/common/SubBlock.hh"
51#include "mem/ruby/network/MessageBuffer.hh"
52#include "mem/ruby/profiler/Profiler.hh"
53#include "mem/ruby/slicc_interface/AbstractController.hh"
54#include "mem/ruby/slicc_interface/RubyRequest.hh"
55#include "mem/ruby/structures/CacheMemory.hh"
56#include "mem/ruby/system/GPUCoalescer.hh"
57#include "mem/ruby/system/RubySystem.hh"
58#include "params/VIPERCoalescer.hh"
59
60using namespace std;
61
62VIPERCoalescer *
63VIPERCoalescerParams::create()
64{
65    return new VIPERCoalescer(this);
66}
67
68VIPERCoalescer::VIPERCoalescer(const Params *p)
69    : GPUCoalescer(p)
70{
71    m_max_wb_per_cycle=p->max_wb_per_cycle;
72    m_max_inv_per_cycle=p->max_inv_per_cycle;
73    m_outstanding_inv = 0;
74    m_outstanding_wb = 0;
75}
76
77VIPERCoalescer::~VIPERCoalescer()
78{
79}
80
81// Analyzes the packet to see if this request can be coalesced.
82// If request can be coalesced, this request is added to the reqCoalescer table
83// and makeRequest returns RequestStatus_Issued;
84// If this is the first request to a cacheline, request is added to both
85// newRequests queue and to the reqCoalescer table; makeRequest
86// returns RequestStatus_Issued.
87// If there is a pending request to this cacheline and this request
88// can't be coalesced, RequestStatus_Aliased is returned and
89// the packet needs to be reissued.
90RequestStatus
91VIPERCoalescer::makeRequest(PacketPtr pkt)
92{
93    if (m_outstanding_wb | m_outstanding_inv) {
94        DPRINTF(GPUCoalescer,
95                "There are %d Writebacks and %d Invalidatons\n",
96                m_outstanding_wb, m_outstanding_inv);
97    }
98    // Are we in the middle of a release
99    if ((m_outstanding_wb) > 0) {
100        if (pkt->req->isKernel()) {
101            // Everythign is fine
102            // Barriers and Kernel End scan coalesce
103            // If it is a Kerenl Begin flush the cache
104            if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
105                invL1();
106            }
107
108            if (pkt->req->isRelease()) {
109                insertKernel(pkt->req->contextId(), pkt);
110            }
111
112            return RequestStatus_Issued;
113        }
114//        return RequestStatus_Aliased;
115    } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
116        // Flush Dirty Data on Kernel End
117        // isKernel + isRelease
118        insertKernel(pkt->req->contextId(), pkt);
119        wbL1();
120        if (m_outstanding_wb == 0) {
121            for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
122                newKernelEnds.push_back(it->first);
123            }
124            completeIssue();
125        }
126        return RequestStatus_Issued;
127    }
128    RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
129    if (requestStatus!=RequestStatus_Issued) {
130        // Request not isssued
131        // enqueue Retry
132        DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
133        return requestStatus;
134    } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
135        // Invalidate clean Data on Kernel Begin
136        // isKernel + isAcquire
137        invL1();
138    } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
139        // Deschedule the AtomicAcqRel and
140        // Flush and Invalidate the L1 cache
141        invwbL1();
142        if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
143            DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
144            deschedule(issueEvent);
145        }
146    } else if (pkt->req->isRelease()) {
147        // Deschedule the StoreRel and
148        // Flush the L1 cache
149        wbL1();
150        if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
151            DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
152            deschedule(issueEvent);
153        }
154    } else if (pkt->req->isAcquire()) {
155        // LoadAcq or AtomicAcq
156        // Invalidate the L1 cache
157        invL1();
158    }
159    // Request was successful
160    if (m_outstanding_wb == 0) {
161        if (!issueEvent.scheduled()) {
162            DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
163            schedule(issueEvent, curTick());
164        }
165    }
166    return RequestStatus_Issued;
167}
168
169void
170VIPERCoalescer::wbCallback(Addr addr)
171{
172    m_outstanding_wb--;
173    // if L1 Flush Complete
174    // attemnpt to schedule issueEvent
175    assert(((int) m_outstanding_wb) >= 0);
176    if (m_outstanding_wb == 0) {
177        for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
178            newKernelEnds.push_back(it->first);
179        }
180        completeIssue();
181    }
182    trySendRetries();
183}
184
185void
186VIPERCoalescer::invCallback(Addr addr)
187{
188    m_outstanding_inv--;
189    // if L1 Flush Complete
190    // attemnpt to schedule issueEvent
191    // This probably won't happen, since
192    // we dont wait on cache invalidations
193    if (m_outstanding_wb == 0) {
194        for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
195            newKernelEnds.push_back(it->first);
196        }
197        completeIssue();
198    }
199    trySendRetries();
200}
201
202/**
203  * Invalidate L1 cache (Acquire)
204  */
205void
206VIPERCoalescer::invL1()
207{
208    int size = m_dataCache_ptr->getNumBlocks();
209    DPRINTF(GPUCoalescer,
210            "There are %d Invalidations outstanding before Cache Walk\n",
211            m_outstanding_inv);
212    // Walk the cache
213    for (int i = 0; i < size; i++) {
214        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
215        // Evict Read-only data
216        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
217            clockEdge(), addr, (uint8_t*) 0, 0, 0,
218            RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
219            nullptr);
220        assert(m_mandatory_q_ptr != NULL);
221        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
222        m_outstanding_inv++;
223    }
224    DPRINTF(GPUCoalescer,
225            "There are %d Invalidatons outstanding after Cache Walk\n",
226            m_outstanding_inv);
227}
228
229/**
230  * Writeback L1 cache (Release)
231  */
232void
233VIPERCoalescer::wbL1()
234{
235    int size = m_dataCache_ptr->getNumBlocks();
236    DPRINTF(GPUCoalescer,
237            "There are %d Writebacks outstanding before Cache Walk\n",
238            m_outstanding_wb);
239    // Walk the cache
240    for (int i = 0; i < size; i++) {
241        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
242        // Write dirty data back
243        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
244            clockEdge(), addr, (uint8_t*) 0, 0, 0,
245            RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
246            nullptr);
247        assert(m_mandatory_q_ptr != NULL);
248        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
249        m_outstanding_wb++;
250    }
251    DPRINTF(GPUCoalescer,
252            "There are %d Writebacks outstanding after Cache Walk\n",
253            m_outstanding_wb);
254}
255
256/**
257  * Invalidate and Writeback L1 cache (Acquire&Release)
258  */
259void
260VIPERCoalescer::invwbL1()
261{
262    int size = m_dataCache_ptr->getNumBlocks();
263    // Walk the cache
264    for (int i = 0; i < size; i++) {
265        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
266        // Evict Read-only data
267        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
268            clockEdge(), addr, (uint8_t*) 0, 0, 0,
269            RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
270            nullptr);
271        assert(m_mandatory_q_ptr != NULL);
272        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
273        m_outstanding_inv++;
274    }
275    // Walk the cache
276    for (int i = 0; i< size; i++) {
277        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
278        // Write dirty data back
279        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
280            clockEdge(), addr, (uint8_t*) 0, 0, 0,
281            RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
282            nullptr);
283        assert(m_mandatory_q_ptr != NULL);
284        m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
285        m_outstanding_wb++;
286    }
287}
288