VIPERCoalescer.cc (11321:02e930db812d) VIPERCoalescer.cc (12334:e0ab29a34764)
1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Sooraj Puthoor
34 */
35
1/*
2 * Copyright (c) 2013-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Author: Sooraj Puthoor
34 */
35
36#include "base/misc.hh"
36#include "base/logging.hh"
37#include "base/str.hh"
38#include "config/the_isa.hh"
39
40#if THE_ISA == X86_ISA
41#include "arch/x86/insts/microldstop.hh"
42
43#endif // X86_ISA
44#include "mem/ruby/system/VIPERCoalescer.hh"
45
46#include "cpu/testers/rubytest/RubyTester.hh"
47#include "debug/GPUCoalescer.hh"
48#include "debug/MemoryAccess.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/common/SubBlock.hh"
51#include "mem/ruby/network/MessageBuffer.hh"
52#include "mem/ruby/profiler/Profiler.hh"
53#include "mem/ruby/slicc_interface/AbstractController.hh"
54#include "mem/ruby/slicc_interface/RubyRequest.hh"
55#include "mem/ruby/structures/CacheMemory.hh"
56#include "mem/ruby/system/GPUCoalescer.hh"
57#include "mem/ruby/system/RubySystem.hh"
58#include "params/VIPERCoalescer.hh"
59
60using namespace std;
61
62VIPERCoalescer *
63VIPERCoalescerParams::create()
64{
65 return new VIPERCoalescer(this);
66}
67
68VIPERCoalescer::VIPERCoalescer(const Params *p)
69 : GPUCoalescer(p)
70{
71 m_max_wb_per_cycle=p->max_wb_per_cycle;
72 m_max_inv_per_cycle=p->max_inv_per_cycle;
73 m_outstanding_inv = 0;
74 m_outstanding_wb = 0;
75}
76
77VIPERCoalescer::~VIPERCoalescer()
78{
79}
80
81// Analyzes the packet to see if this request can be coalesced.
82// If request can be coalesced, this request is added to the reqCoalescer table
83// and makeRequest returns RequestStatus_Issued;
84// If this is the first request to a cacheline, request is added to both
85// newRequests queue and to the reqCoalescer table; makeRequest
86// returns RequestStatus_Issued.
87// If there is a pending request to this cacheline and this request
88// can't be coalesced, RequestStatus_Aliased is returned and
89// the packet needs to be reissued.
90RequestStatus
91VIPERCoalescer::makeRequest(PacketPtr pkt)
92{
93 if (m_outstanding_wb | m_outstanding_inv) {
94 DPRINTF(GPUCoalescer,
95 "There are %d Writebacks and %d Invalidatons\n",
96 m_outstanding_wb, m_outstanding_inv);
97 }
98 // Are we in the middle of a release
99 if ((m_outstanding_wb) > 0) {
100 if (pkt->req->isKernel()) {
101 // Everythign is fine
102 // Barriers and Kernel End scan coalesce
103 // If it is a Kerenl Begin flush the cache
104 if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
105 invL1();
106 }
107
108 if (pkt->req->isRelease()) {
109 insertKernel(pkt->req->contextId(), pkt);
110 }
111
112 return RequestStatus_Issued;
113 }
114// return RequestStatus_Aliased;
115 } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
116 // Flush Dirty Data on Kernel End
117 // isKernel + isRelease
118 insertKernel(pkt->req->contextId(), pkt);
119 wbL1();
120 if (m_outstanding_wb == 0) {
121 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
122 newKernelEnds.push_back(it->first);
123 }
124 completeIssue();
125 }
126 return RequestStatus_Issued;
127 }
128 RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
129 if (requestStatus!=RequestStatus_Issued) {
130 // Request not isssued
131 // enqueue Retry
132 DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
133 return requestStatus;
134 } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
135 // Invalidate clean Data on Kernel Begin
136 // isKernel + isAcquire
137 invL1();
138 } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
139 // Deschedule the AtomicAcqRel and
140 // Flush and Invalidate the L1 cache
141 invwbL1();
142 if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
143 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
144 deschedule(issueEvent);
145 }
146 } else if (pkt->req->isRelease()) {
147 // Deschedule the StoreRel and
148 // Flush the L1 cache
149 wbL1();
150 if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
151 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
152 deschedule(issueEvent);
153 }
154 } else if (pkt->req->isAcquire()) {
155 // LoadAcq or AtomicAcq
156 // Invalidate the L1 cache
157 invL1();
158 }
159 // Request was successful
160 if (m_outstanding_wb == 0) {
161 if (!issueEvent.scheduled()) {
162 DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
163 schedule(issueEvent, curTick());
164 }
165 }
166 return RequestStatus_Issued;
167}
168
169void
170VIPERCoalescer::wbCallback(Addr addr)
171{
172 m_outstanding_wb--;
173 // if L1 Flush Complete
174 // attemnpt to schedule issueEvent
175 assert(((int) m_outstanding_wb) >= 0);
176 if (m_outstanding_wb == 0) {
177 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
178 newKernelEnds.push_back(it->first);
179 }
180 completeIssue();
181 }
182 trySendRetries();
183}
184
185void
186VIPERCoalescer::invCallback(Addr addr)
187{
188 m_outstanding_inv--;
189 // if L1 Flush Complete
190 // attemnpt to schedule issueEvent
191 // This probably won't happen, since
192 // we dont wait on cache invalidations
193 if (m_outstanding_wb == 0) {
194 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
195 newKernelEnds.push_back(it->first);
196 }
197 completeIssue();
198 }
199 trySendRetries();
200}
201
202/**
203 * Invalidate L1 cache (Acquire)
204 */
205void
206VIPERCoalescer::invL1()
207{
208 int size = m_dataCache_ptr->getNumBlocks();
209 DPRINTF(GPUCoalescer,
210 "There are %d Invalidations outstanding before Cache Walk\n",
211 m_outstanding_inv);
212 // Walk the cache
213 for (int i = 0; i < size; i++) {
214 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
215 // Evict Read-only data
216 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
217 clockEdge(), addr, (uint8_t*) 0, 0, 0,
218 RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
219 nullptr);
220 assert(m_mandatory_q_ptr != NULL);
221 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
222 m_outstanding_inv++;
223 }
224 DPRINTF(GPUCoalescer,
225 "There are %d Invalidatons outstanding after Cache Walk\n",
226 m_outstanding_inv);
227}
228
229/**
230 * Writeback L1 cache (Release)
231 */
232void
233VIPERCoalescer::wbL1()
234{
235 int size = m_dataCache_ptr->getNumBlocks();
236 DPRINTF(GPUCoalescer,
237 "There are %d Writebacks outstanding before Cache Walk\n",
238 m_outstanding_wb);
239 // Walk the cache
240 for (int i = 0; i < size; i++) {
241 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
242 // Write dirty data back
243 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
244 clockEdge(), addr, (uint8_t*) 0, 0, 0,
245 RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
246 nullptr);
247 assert(m_mandatory_q_ptr != NULL);
248 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
249 m_outstanding_wb++;
250 }
251 DPRINTF(GPUCoalescer,
252 "There are %d Writebacks outstanding after Cache Walk\n",
253 m_outstanding_wb);
254}
255
256/**
257 * Invalidate and Writeback L1 cache (Acquire&Release)
258 */
259void
260VIPERCoalescer::invwbL1()
261{
262 int size = m_dataCache_ptr->getNumBlocks();
263 // Walk the cache
264 for (int i = 0; i < size; i++) {
265 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
266 // Evict Read-only data
267 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
268 clockEdge(), addr, (uint8_t*) 0, 0, 0,
269 RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
270 nullptr);
271 assert(m_mandatory_q_ptr != NULL);
272 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
273 m_outstanding_inv++;
274 }
275 // Walk the cache
276 for (int i = 0; i< size; i++) {
277 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
278 // Write dirty data back
279 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
280 clockEdge(), addr, (uint8_t*) 0, 0, 0,
281 RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
282 nullptr);
283 assert(m_mandatory_q_ptr != NULL);
284 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
285 m_outstanding_wb++;
286 }
287}
37#include "base/str.hh"
38#include "config/the_isa.hh"
39
40#if THE_ISA == X86_ISA
41#include "arch/x86/insts/microldstop.hh"
42
43#endif // X86_ISA
44#include "mem/ruby/system/VIPERCoalescer.hh"
45
46#include "cpu/testers/rubytest/RubyTester.hh"
47#include "debug/GPUCoalescer.hh"
48#include "debug/MemoryAccess.hh"
49#include "mem/packet.hh"
50#include "mem/ruby/common/SubBlock.hh"
51#include "mem/ruby/network/MessageBuffer.hh"
52#include "mem/ruby/profiler/Profiler.hh"
53#include "mem/ruby/slicc_interface/AbstractController.hh"
54#include "mem/ruby/slicc_interface/RubyRequest.hh"
55#include "mem/ruby/structures/CacheMemory.hh"
56#include "mem/ruby/system/GPUCoalescer.hh"
57#include "mem/ruby/system/RubySystem.hh"
58#include "params/VIPERCoalescer.hh"
59
60using namespace std;
61
62VIPERCoalescer *
63VIPERCoalescerParams::create()
64{
65 return new VIPERCoalescer(this);
66}
67
68VIPERCoalescer::VIPERCoalescer(const Params *p)
69 : GPUCoalescer(p)
70{
71 m_max_wb_per_cycle=p->max_wb_per_cycle;
72 m_max_inv_per_cycle=p->max_inv_per_cycle;
73 m_outstanding_inv = 0;
74 m_outstanding_wb = 0;
75}
76
77VIPERCoalescer::~VIPERCoalescer()
78{
79}
80
81// Analyzes the packet to see if this request can be coalesced.
82// If request can be coalesced, this request is added to the reqCoalescer table
83// and makeRequest returns RequestStatus_Issued;
84// If this is the first request to a cacheline, request is added to both
85// newRequests queue and to the reqCoalescer table; makeRequest
86// returns RequestStatus_Issued.
87// If there is a pending request to this cacheline and this request
88// can't be coalesced, RequestStatus_Aliased is returned and
89// the packet needs to be reissued.
90RequestStatus
91VIPERCoalescer::makeRequest(PacketPtr pkt)
92{
93 if (m_outstanding_wb | m_outstanding_inv) {
94 DPRINTF(GPUCoalescer,
95 "There are %d Writebacks and %d Invalidatons\n",
96 m_outstanding_wb, m_outstanding_inv);
97 }
98 // Are we in the middle of a release
99 if ((m_outstanding_wb) > 0) {
100 if (pkt->req->isKernel()) {
101 // Everythign is fine
102 // Barriers and Kernel End scan coalesce
103 // If it is a Kerenl Begin flush the cache
104 if (pkt->req->isAcquire() && (m_outstanding_inv == 0)) {
105 invL1();
106 }
107
108 if (pkt->req->isRelease()) {
109 insertKernel(pkt->req->contextId(), pkt);
110 }
111
112 return RequestStatus_Issued;
113 }
114// return RequestStatus_Aliased;
115 } else if (pkt->req->isKernel() && pkt->req->isRelease()) {
116 // Flush Dirty Data on Kernel End
117 // isKernel + isRelease
118 insertKernel(pkt->req->contextId(), pkt);
119 wbL1();
120 if (m_outstanding_wb == 0) {
121 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
122 newKernelEnds.push_back(it->first);
123 }
124 completeIssue();
125 }
126 return RequestStatus_Issued;
127 }
128 RequestStatus requestStatus = GPUCoalescer::makeRequest(pkt);
129 if (requestStatus!=RequestStatus_Issued) {
130 // Request not isssued
131 // enqueue Retry
132 DPRINTF(GPUCoalescer, "Request not issued by GPUCoaleser\n");
133 return requestStatus;
134 } else if (pkt->req->isKernel() && pkt->req->isAcquire()) {
135 // Invalidate clean Data on Kernel Begin
136 // isKernel + isAcquire
137 invL1();
138 } else if (pkt->req->isAcquire() && pkt->req->isRelease()) {
139 // Deschedule the AtomicAcqRel and
140 // Flush and Invalidate the L1 cache
141 invwbL1();
142 if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
143 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
144 deschedule(issueEvent);
145 }
146 } else if (pkt->req->isRelease()) {
147 // Deschedule the StoreRel and
148 // Flush the L1 cache
149 wbL1();
150 if (m_outstanding_wb > 0 && issueEvent.scheduled()) {
151 DPRINTF(GPUCoalescer, "issueEvent Descheduled\n");
152 deschedule(issueEvent);
153 }
154 } else if (pkt->req->isAcquire()) {
155 // LoadAcq or AtomicAcq
156 // Invalidate the L1 cache
157 invL1();
158 }
159 // Request was successful
160 if (m_outstanding_wb == 0) {
161 if (!issueEvent.scheduled()) {
162 DPRINTF(GPUCoalescer, "issueEvent Rescheduled\n");
163 schedule(issueEvent, curTick());
164 }
165 }
166 return RequestStatus_Issued;
167}
168
169void
170VIPERCoalescer::wbCallback(Addr addr)
171{
172 m_outstanding_wb--;
173 // if L1 Flush Complete
174 // attemnpt to schedule issueEvent
175 assert(((int) m_outstanding_wb) >= 0);
176 if (m_outstanding_wb == 0) {
177 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
178 newKernelEnds.push_back(it->first);
179 }
180 completeIssue();
181 }
182 trySendRetries();
183}
184
185void
186VIPERCoalescer::invCallback(Addr addr)
187{
188 m_outstanding_inv--;
189 // if L1 Flush Complete
190 // attemnpt to schedule issueEvent
191 // This probably won't happen, since
192 // we dont wait on cache invalidations
193 if (m_outstanding_wb == 0) {
194 for (auto it = kernelEndList.begin(); it != kernelEndList.end(); it++) {
195 newKernelEnds.push_back(it->first);
196 }
197 completeIssue();
198 }
199 trySendRetries();
200}
201
202/**
203 * Invalidate L1 cache (Acquire)
204 */
205void
206VIPERCoalescer::invL1()
207{
208 int size = m_dataCache_ptr->getNumBlocks();
209 DPRINTF(GPUCoalescer,
210 "There are %d Invalidations outstanding before Cache Walk\n",
211 m_outstanding_inv);
212 // Walk the cache
213 for (int i = 0; i < size; i++) {
214 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
215 // Evict Read-only data
216 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
217 clockEdge(), addr, (uint8_t*) 0, 0, 0,
218 RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
219 nullptr);
220 assert(m_mandatory_q_ptr != NULL);
221 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
222 m_outstanding_inv++;
223 }
224 DPRINTF(GPUCoalescer,
225 "There are %d Invalidatons outstanding after Cache Walk\n",
226 m_outstanding_inv);
227}
228
229/**
230 * Writeback L1 cache (Release)
231 */
232void
233VIPERCoalescer::wbL1()
234{
235 int size = m_dataCache_ptr->getNumBlocks();
236 DPRINTF(GPUCoalescer,
237 "There are %d Writebacks outstanding before Cache Walk\n",
238 m_outstanding_wb);
239 // Walk the cache
240 for (int i = 0; i < size; i++) {
241 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
242 // Write dirty data back
243 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
244 clockEdge(), addr, (uint8_t*) 0, 0, 0,
245 RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
246 nullptr);
247 assert(m_mandatory_q_ptr != NULL);
248 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
249 m_outstanding_wb++;
250 }
251 DPRINTF(GPUCoalescer,
252 "There are %d Writebacks outstanding after Cache Walk\n",
253 m_outstanding_wb);
254}
255
256/**
257 * Invalidate and Writeback L1 cache (Acquire&Release)
258 */
259void
260VIPERCoalescer::invwbL1()
261{
262 int size = m_dataCache_ptr->getNumBlocks();
263 // Walk the cache
264 for (int i = 0; i < size; i++) {
265 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
266 // Evict Read-only data
267 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
268 clockEdge(), addr, (uint8_t*) 0, 0, 0,
269 RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor,
270 nullptr);
271 assert(m_mandatory_q_ptr != NULL);
272 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
273 m_outstanding_inv++;
274 }
275 // Walk the cache
276 for (int i = 0; i< size; i++) {
277 Addr addr = m_dataCache_ptr->getAddressAtIdx(i);
278 // Write dirty data back
279 std::shared_ptr<RubyRequest> msg = std::make_shared<RubyRequest>(
280 clockEdge(), addr, (uint8_t*) 0, 0, 0,
281 RubyRequestType_FLUSH, RubyAccessMode_Supervisor,
282 nullptr);
283 assert(m_mandatory_q_ptr != NULL);
284 m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency);
285 m_outstanding_wb++;
286 }
287}