1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Sooraj Puthoor
34 */
35
36#include "base/logging.hh"
37#include "base/str.hh"
38#include "config/the_isa.hh"
39
40#if THE_ISA == X86_ISA
41#include "arch/x86/insts/microldstop.hh"
42
43#endif // X86_ISA
44#include "mem/ruby/system/VIPERCoalescer.hh"
45
46#include "cpu/testers/rubytest/RubyTester.hh"
47#include "debug/GPUCoalescer.hh"
48#include "debug/MemoryAccess.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/common/SubBlock.hh"
51#include "mem/ruby/network/MessageBuffer.hh"
52#include "mem/ruby/profiler/Profiler.hh"
53#include "mem/ruby/slicc_interface/AbstractController.hh"
54#include "mem/ruby/slicc_interface/RubyRequest.hh"
55#include "mem/ruby/structures/CacheMemory.hh"
56#include "mem/ruby/system/GPUCoalescer.hh"
57#include "mem/ruby/system/RubySystem.hh"
58#include "params/VIPERCoalescer.hh"
59
60using namespace std;
61
62VIPERCoalescer *
63VIPERCoalescerParams::create()
64{
65    return new VIPERCoalescer(this);
66}
67
68VIPERCoalescer::VIPERCoalescer(const Params *p)
69    : GPUCoalescer(p)
70{
71    m_max_wb_per_cycle=p->max_wb_per_cycle;
72    m_max_inv_per_cycle=p->max_inv_per_cycle;
73    m_outstanding_inv = 0;
74    m_outstanding_wb = 0;
75}
76
77VIPERCoalescer::~VIPERCoalescer()
78{
79}
80
81// Analyzes the packet to see if this request can be coalesced.
82// If request can be coalesced, this request is added to the reqCoalescer table
83// and makeRequest returns RequestStatus_Issued;
84// If this is the first request to a cacheline, request is added to both
85// newRequests queue and to the reqCoalescer table; makeRequest
86// returns RequestStatus_Issued.
87// If there is a pending request to this cacheline and this request
88// can't be coalesced, RequestStatus_Aliased is returned and
89// the packet needs to be reissued.
90RequestStatus
91VIPERCoalescer::makeRequest(PacketPtr pkt)
92{
93    if (m_outstanding_wb | m_outstanding_inv) {
94        DPRINTF(GPUCoalescer,
95                "There are %d Writebacks and %d Invalidatons\n",
96                m_outstanding_wb, m_outstanding_inv);
97    }
98    // Are we in the middle of a release
99    if ((m_outstanding_wb) > 0) {
100        if (pkt->req->isKernel()) {
101            // Everythign is fine
102            // Barriers and Kernel End scan coalesce
103            // If it is a Kerenl Begin flush the cache
104            if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
105                invL1();
106            }
107
108            if (pkt->req->isRelease()) {
109                insertKernel(pkt->req->contextId(), pkt);
110            }
111
112            return RequestStatus_Issued;
113        }
114//        return RequestStatus_Aliased;
115    } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
116        // Flush Dirty Data on Kernel End
117        // isKernel + isRelease
118        insertKernel(pkt->req->contextId(), pkt);
119        wbL1();
120        if (m_outstanding_wb == 0) {
121            for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
122                newKernelEnds.push_back(it->first);
123            }
124            completeIssue();
125        }
126        return RequestStatus_Issued;
127    }
128    RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
129    if (requestStatus!=RequestStatus_Issued) {
130        // Request not isssued
131        // enqueue Retry
132        DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
133        return requestStatus;
134    } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
135        // Invalidate clean Data on Kernel Begin
136        // isKernel + isAcquire
137        invL1();
138    } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
139        // Deschedule the AtomicAcqRel and
140        // Flush and Invalidate the L1 cache
141        invwbL1();
142        if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
143            DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
144            deschedule(issueEvent);
145        }
146    } else if (pkt->req->isRelease()) {
147        // Deschedule the StoreRel and
148        // Flush the L1 cache
149        wbL1();
150        if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
151            DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
152            deschedule(issueEvent);
153        }
154    } else if (pkt->req->isAcquire()) {
155        // LoadAcq or AtomicAcq
156        // Invalidate the L1 cache
157        invL1();
158    }
159    // Request was successful
160    if (m_outstanding_wb == 0) {
161        if (!issueEvent.scheduled()) {
162            DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
163            schedule(issueEvent, curTick());
164        }
165    }
166    return RequestStatus_Issued;
167}
168
169void
170VIPERCoalescer::wbCallback(Addr addr)
171{
172    m_outstanding_wb--;
173    // if L1 Flush Complete
174    // attemnpt to schedule issueEvent
175    assert(((int) m_outstanding_wb) >= 0);
176    if (m_outstanding_wb == 0) {
177        for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
178            newKernelEnds.push_back(it->first);
179        }
180        completeIssue();
181    }
182    trySendRetries();
183}
184
185void
186VIPERCoalescer::invCallback(Addr addr)
187{
188    m_outstanding_inv--;
189    // if L1 Flush Complete
190    // attemnpt to schedule issueEvent
191    // This probably won't happen, since
192    // we dont wait on cache invalidations
193    if (m_outstanding_wb == 0) {
194        for (auto it =  kernelEndList.begin(); it != kernelEndList.end(); it++) {
195            newKernelEnds.push_back(it->first);
196        }
197        completeIssue();
198    }
199    trySendRetries();
200}
201
202/**
203  * Invalidate L1 cache (Acquire)
204  */
205void
206VIPERCoalescer::invL1()
207{
208    int size = m_dataCache_ptr->getNumBlocks();
209    DPRINTF(GPUCoalescer,
210            "There are %d Invalidations outstanding before Cache Walk\n",
211            m_outstanding_inv);
212    // Walk the cache
213    for (int i = 0; i < size; i++) {
214        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
215        // Evict Read-only data
216        RubyRequestType request_type = RubyRequestType_REPLACEMENT;
217        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
218            clockEdge(), addr, (uint8_t*) 0, 0, 0,
219            request_type, RubyAccessMode_Supervisor,
220            nullptr);
221        assert(m_mandatory_q_ptr != NULL);
222        Tick latency = cyclesToTicks(
223                            m_controller->mandatoryQueueLatency(request_type));
224        assert(latency > 0);
225        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
226        m_outstanding_inv++;
227    }
228    DPRINTF(GPUCoalescer,
229            "There are %d Invalidatons outstanding after Cache Walk\n",
230            m_outstanding_inv);
231}
232
233/**
234  * Writeback L1 cache (Release)
235  */
236void
237VIPERCoalescer::wbL1()
238{
239    int size = m_dataCache_ptr->getNumBlocks();
240    DPRINTF(GPUCoalescer,
241            "There are %d Writebacks outstanding before Cache Walk\n",
242            m_outstanding_wb);
243    // Walk the cache
244    for (int i = 0; i < size; i++) {
245        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
246        // Write dirty data back
247        RubyRequestType request_type = RubyRequestType_FLUSH;
248        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
249            clockEdge(), addr, (uint8_t*) 0, 0, 0,
250            request_type, RubyAccessMode_Supervisor,
251            nullptr);
252        assert(m_mandatory_q_ptr != NULL);
253        Tick latency = cyclesToTicks(
254                            m_controller->mandatoryQueueLatency(request_type));
255        assert(latency > 0);
256        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
257        m_outstanding_wb++;
258    }
259    DPRINTF(GPUCoalescer,
260            "There are %d Writebacks outstanding after Cache Walk\n",
261            m_outstanding_wb);
262}
263
264/**
265  * Invalidate and Writeback L1 cache (Acquire&Release)
266  */
267void
268VIPERCoalescer::invwbL1()
269{
270    int size = m_dataCache_ptr->getNumBlocks();
271    // Walk the cache
272    for (int i = 0; i < size; i++) {
273        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
274        // Evict Read-only data
275        RubyRequestType request_type = RubyRequestType_REPLACEMENT;
276        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
277            clockEdge(), addr, (uint8_t*) 0, 0, 0,
278            request_type, RubyAccessMode_Supervisor,
279            nullptr);
280        assert(m_mandatory_q_ptr != NULL);
281        Tick latency = cyclesToTicks(
282                            m_controller->mandatoryQueueLatency(request_type));
283        assert(latency > 0);
284        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
285        m_outstanding_inv++;
286    }
287    // Walk the cache
288    for (int i = 0; i< size; i++) {
289        Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
290        // Write dirty data back
291        RubyRequestType request_type = RubyRequestType_FLUSH;
292        std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
293            clockEdge(), addr, (uint8_t*) 0, 0, 0,
294            request_type, RubyAccessMode_Supervisor,
295            nullptr);
296        assert(m_mandatory_q_ptr != NULL);
297        Tick latency = cyclesToTicks(
298                m_controller->mandatoryQueueLatency(request_type));
299        assert(latency > 0);
300        m_mandatory_q_ptr->enqueue(msg, clockEdge(), latency);
301        m_outstanding_wb++;
302    }
303}
304