bdi.cc (13944:5000533e6b81) bdi.cc (14269:7e364bd625e1)
1/*
2 * Copyright (c) 2018 Inria
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Daniel Carvalho
29 */
30
31/** @file
32 * Implementation of the BDI cache compressor.
33 */
34
35#include "mem/cache/compressors/bdi.hh"
36
37#include <algorithm>
38#include <climits>
39#include <cstring>
40#include <type_traits>
41
42#include "debug/CacheComp.hh"
43#include "params/BDI.hh"
44
45// Number of bytes in a qword
46#define BYTES_PER_QWORD 8
47
48// Declare BDI encoding names
49const char* BDI::ENCODING_NAMES[] =
50 {"Zero", "Repeated_Values", "Base8_1", "Base8_2", "Base8_4", "Base4_1",
51 "Base4_2", "Base2_1", "Uncompressed"};
52
53BDI::BDICompData::BDICompData(const uint8_t encoding)
54 : CompressionData(), _encoding(encoding)
55{
56}
57
58uint8_t
59BDI::BDICompData::getEncoding() const
60{
61 return _encoding;
62}
63
64std::string
65BDI::BDICompData::getName() const
66{
67 return ENCODING_NAMES[_encoding];
68}
69
70BDI::BDICompDataZeros::BDICompDataZeros()
71 : BDICompData(ZERO)
72{
73 // Calculate compressed size
74 calculateCompressedSize();
75}
76
77uint64_t
78BDI::BDICompDataZeros::access(const int index) const
79{
80 return 0;
81}
82
83void
84BDI::BDICompDataZeros::calculateCompressedSize()
85{
86 // Number of bits used by Encoding
87 std::size_t size = encodingBits;
88
89 setSizeBits(size);
90}
91
92BDI::BDICompDataRep::BDICompDataRep(const uint64_t rep_value)
93 : BDICompData(REP_VALUES)
94{
95 // Set base value
96 base = rep_value;
97
98 // Calculate compressed size
99 calculateCompressedSize();
100}
101
102uint64_t
103BDI::BDICompDataRep::access(const int index) const
104{
105 return base;
106}
107
108void
109BDI::BDICompDataRep::calculateCompressedSize()
110{
111 // Number of bits used by Encoding
112 std::size_t size = encodingBits;
113
114 // Number of bits used by Base
115 size += sizeof(base)*CHAR_BIT;
116
117 setSizeBits(size);
118}
119
120BDI::BDICompDataUncompressed::BDICompDataUncompressed(
121 const uint64_t* data, const std::size_t blk_size)
122 : BDICompData(UNCOMPRESSED), blkSize(blk_size),
123 _data(data, data + blk_size/CHAR_BIT)
124{
125 // Calculate compressed size
126 calculateCompressedSize();
127}
128
129uint64_t
130BDI::BDICompDataUncompressed::access(const int index) const
131{
132 return _data[index];
133}
134
135void
136BDI::BDICompDataUncompressed::calculateCompressedSize()
137{
138 // Number of bits used by Encoding
139 std::size_t size = encodingBits;
140
141 // Number of bits used by uncompressed line
142 size += blkSize*CHAR_BIT;
143
144 setSizeBits(size);
145}
146
147template <class TB, class TD>
148BDI::BDICompDataBaseDelta<TB, TD>::BDICompDataBaseDelta(const uint8_t encoding,
149 const std::size_t blk_size, const std::size_t max_num_bases)
150 : BDICompData(encoding), maxNumBases(max_num_bases)
151{
152 // Reserve the maximum possible size for the vectors
153 bases.reserve(maxNumBases);
154 bitMask.reserve(blk_size/sizeof(TD));
155 deltas.reserve(blk_size/sizeof(TD));
156
157 // Push virtual base 0 to bases list
158 bases.push_back(0);
159}
160
161template <class TB, class TD>
162void
163BDI::BDICompDataBaseDelta<TB, TD>::calculateCompressedSize()
164{
165 // Number of bits used by Encoding
166 std::size_t size = encodingBits;
167
168 // Number of bits used by BitMask
1/*
2 * Copyright (c) 2018 Inria
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Daniel Carvalho
29 */
30
31/** @file
32 * Implementation of the BDI cache compressor.
33 */
34
35#include "mem/cache/compressors/bdi.hh"
36
37#include <algorithm>
38#include <climits>
39#include <cstring>
40#include <type_traits>
41
42#include "debug/CacheComp.hh"
43#include "params/BDI.hh"
44
45// Number of bytes in a qword
46#define BYTES_PER_QWORD 8
47
48// Declare BDI encoding names
49const char* BDI::ENCODING_NAMES[] =
50 {"Zero", "Repeated_Values", "Base8_1", "Base8_2", "Base8_4", "Base4_1",
51 "Base4_2", "Base2_1", "Uncompressed"};
52
53BDI::BDICompData::BDICompData(const uint8_t encoding)
54 : CompressionData(), _encoding(encoding)
55{
56}
57
58uint8_t
59BDI::BDICompData::getEncoding() const
60{
61 return _encoding;
62}
63
64std::string
65BDI::BDICompData::getName() const
66{
67 return ENCODING_NAMES[_encoding];
68}
69
70BDI::BDICompDataZeros::BDICompDataZeros()
71 : BDICompData(ZERO)
72{
73 // Calculate compressed size
74 calculateCompressedSize();
75}
76
77uint64_t
78BDI::BDICompDataZeros::access(const int index) const
79{
80 return 0;
81}
82
83void
84BDI::BDICompDataZeros::calculateCompressedSize()
85{
86 // Number of bits used by Encoding
87 std::size_t size = encodingBits;
88
89 setSizeBits(size);
90}
91
92BDI::BDICompDataRep::BDICompDataRep(const uint64_t rep_value)
93 : BDICompData(REP_VALUES)
94{
95 // Set base value
96 base = rep_value;
97
98 // Calculate compressed size
99 calculateCompressedSize();
100}
101
102uint64_t
103BDI::BDICompDataRep::access(const int index) const
104{
105 return base;
106}
107
108void
109BDI::BDICompDataRep::calculateCompressedSize()
110{
111 // Number of bits used by Encoding
112 std::size_t size = encodingBits;
113
114 // Number of bits used by Base
115 size += sizeof(base)*CHAR_BIT;
116
117 setSizeBits(size);
118}
119
120BDI::BDICompDataUncompressed::BDICompDataUncompressed(
121 const uint64_t* data, const std::size_t blk_size)
122 : BDICompData(UNCOMPRESSED), blkSize(blk_size),
123 _data(data, data + blk_size/CHAR_BIT)
124{
125 // Calculate compressed size
126 calculateCompressedSize();
127}
128
129uint64_t
130BDI::BDICompDataUncompressed::access(const int index) const
131{
132 return _data[index];
133}
134
135void
136BDI::BDICompDataUncompressed::calculateCompressedSize()
137{
138 // Number of bits used by Encoding
139 std::size_t size = encodingBits;
140
141 // Number of bits used by uncompressed line
142 size += blkSize*CHAR_BIT;
143
144 setSizeBits(size);
145}
146
147template <class TB, class TD>
148BDI::BDICompDataBaseDelta<TB, TD>::BDICompDataBaseDelta(const uint8_t encoding,
149 const std::size_t blk_size, const std::size_t max_num_bases)
150 : BDICompData(encoding), maxNumBases(max_num_bases)
151{
152 // Reserve the maximum possible size for the vectors
153 bases.reserve(maxNumBases);
154 bitMask.reserve(blk_size/sizeof(TD));
155 deltas.reserve(blk_size/sizeof(TD));
156
157 // Push virtual base 0 to bases list
158 bases.push_back(0);
159}
160
161template <class TB, class TD>
162void
163BDI::BDICompDataBaseDelta<TB, TD>::calculateCompressedSize()
164{
165 // Number of bits used by Encoding
166 std::size_t size = encodingBits;
167
168 // Number of bits used by BitMask
169 size += bitMask.size()*std::ceil(std::log2(bases.size()));
169 size += bitMask.size()*std::ceil(std::log2(maxNumBases));
170
171 // Number of bits used by Bases. bases[0] is implicit in a hardware
172 // implementation, therefore its size is 0
173 size += (maxNumBases-1)*sizeof(TB)*CHAR_BIT;
174
175 // Number of bits used by Deltas
176 size += deltas.size()*sizeof(TD)*CHAR_BIT;
177
178 CompressionData::setSizeBits(size);
179}
180
181template <class TB, class TD>
182bool
183BDI::BDICompDataBaseDelta<TB, TD>::addBase(const TB base)
184{
185 // Can't add base if reached limit of number of bases
186 if (bases.size() >= maxNumBases) {
187 return false;
188 }
189
190 // Push new base to end of bases list
191 bases.push_back(base);
192
193 // New delta is 0, as it is a difference between the new base and itself
194 addDelta(bases.size() - 1, 0);
195
196 return true;
197}
198
199template <class TB, class TD>
200void
201BDI::BDICompDataBaseDelta<TB, TD>::addDelta(const std::size_t base_index,
202 const TD delta)
203{
204 // Insert new delta with respect to the given base
205 bitMask.push_back(base_index);
206
207 // Insert new delta
208 deltas.push_back(delta);
209}
210
211template <class TB, class TD> bool
212BDI::BDICompDataBaseDelta<TB, TD>::compress(const uint64_t* data,
213 const std::size_t blk_size)
214{
215 // Parse through data in a sizeof(TB) granularity
216 for (std::size_t byte_start = 0; byte_start < blk_size;
217 byte_start += sizeof(TB))
218 {
219 // Get current value
220 TB curValue;
221 std::memcpy(&curValue, ((uint8_t*)data) + byte_start,
222 sizeof(TB));
223
224 // Iterate through all bases to search for a valid delta
225 bool foundDelta = false;
226 for (int base_index = 0; base_index < bases.size(); base_index++) {
227 // Calculate delta relative to currently parsed base
228 typename std::make_signed<TB>::type delta = curValue -
229 bases[base_index];
230
231 // Check if the delta is within the limits of the delta size. If
232 // that is the case, add delta to compressed data and keep parsing
233 // the input data
234 typename std::make_signed<TB>::type limit =
235 ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TD))*CHAR_BIT+1);
236 if ((delta >= -limit) && (delta <= limit)) {
237 addDelta(base_index, delta);
238 foundDelta = true;
239 break;
240 }
241 }
242
243 // If we cannot find a base that can accommodate this new line's data,
244 // add this value as the new base and insert its respective delta of 0.
245 // If the new base can't be added, it means that we reached the base
246 // limit, so line is uncompressible using the given encoding
247 if (!foundDelta && !addBase(curValue)) {
248 return false;
249 }
250 }
251
252 // Calculate compressed size
253 calculateCompressedSize();
254
255 return true;
256}
257
258template <class TB, class TD>
259uint64_t
260BDI::BDICompDataBaseDelta<TB, TD>::access(const int index) const
261{
262 // We decompress all base-delta pairs that form the 64-bit entry
263 // corresponding to the given 64-bit-array index
264 uint64_t value = 0;
265
266 // Get relationship between the size of an uint64_t base and size of TB
267 const std::size_t size_diff = sizeof(uint64_t)/sizeof(TB);
268
269 // Mask for a base entry
270 const uint64_t mask = ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TB))*CHAR_BIT);
271
272 // Size, in bits, of a base entry. Cant be const because compiler will
273 // optimize and create a 64 bit instance, which will generate a shift size
274 // compilation error
275 int base_size_bits = sizeof(TB)*CHAR_BIT;
276
277 // Concatenate all base-delta entries until they form a 64-bit entry
278 for (int delta_index = size_diff * (index + 1) - 1;
279 delta_index >= (int)(size_diff * index); delta_index--) {
280 // Get base and delta entries corresponding to the current delta
281 assert(delta_index < deltas.size());
282 const TD delta = deltas[delta_index];
283 const int base_index = bitMask[delta_index];
284 assert(base_index < bases.size());
285 const TB base = bases[base_index];
286
287 // Concatenate decompressed value
288 value <<= base_size_bits;
289 value |= static_cast<uint64_t>((base + delta) & mask);
290 }
291
292 return value;
293}
294
295BDI::BDI(const Params *p)
296 : BaseCacheCompressor(p), useMoreCompressors(p->use_more_compressors),
297 qwordsPerCacheLine(blkSize/BYTES_PER_QWORD)
298{
299 static_assert(sizeof(ENCODING_NAMES)/sizeof(char*) == NUM_ENCODINGS,
300 "Number of encodings doesn't match the number of names");
301}
302
303bool
304BDI::isZeroPackable(const uint64_t* data) const
305{
306 return std::all_of(data, data + qwordsPerCacheLine,
307 [](const uint64_t entry){ return entry == 0; });
308}
309
310bool
311BDI::isSameValuePackable(const uint64_t* data) const
312{
313 // We don't want to copy the whole array to the lambda expression
314 const uint64_t rep_value = data[0];
315 return std::all_of(data, data + qwordsPerCacheLine,
316 [rep_value](const uint64_t entry)
317 {return entry == rep_value;});
318}
319
320template <class TB, class TD>
321std::unique_ptr<BDI::BDICompData>
322BDI::tryCompress(const uint64_t* data, const uint8_t encoding) const
323{
324 // Instantiate compressor
325 auto temp_data = std::unique_ptr<BDICompDataBaseDelta<TB, TD>>(
326 new BDICompDataBaseDelta<TB, TD>(encoding, blkSize));
327
328 // Try compressing. Return nullptr if compressor can't compress given line
329 if (temp_data->compress(data, blkSize)) {
330 return std::move(temp_data);
331 } else {
332 return std::unique_ptr<BDICompData>{};
333 }
334}
335
336void
337BDI::decompress(const BaseCacheCompressor::CompressionData* comp_data,
338 uint64_t* data)
339{
340 // Decompress and go back to host endianness
341 for (std::size_t i = 0; i < qwordsPerCacheLine; i++)
342 data[i] = static_cast<const BDICompData*>(comp_data)->access(i);
343}
344
345std::unique_ptr<BaseCacheCompressor::CompressionData>
346BDI::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
347{
348 std::unique_ptr<BDICompData> bdi_data;
349
350 // Check if it is a zero line
351 if (isZeroPackable(data)) {
352 bdi_data = std::unique_ptr<BDICompData>(new BDICompDataZeros());
353
354 // Set compression latency (compare 1 qword per cycle)
355 comp_lat = Cycles(qwordsPerCacheLine);
356 // Check if all values in the line are the same
357 } else if (isSameValuePackable(data)) {
358 bdi_data = std::unique_ptr<BDICompData>(new BDICompDataRep(data[0]));
359
360 // Set compression latency (compare 1 qword per cycle)
361 comp_lat = Cycles(qwordsPerCacheLine);
362 } else {
363 // Initialize compressed data as an uncompressed instance
364 bdi_data = std::unique_ptr<BDICompData>(
365 new BDICompDataUncompressed(data, blkSize));
366
367 // Base size-delta size ratio. Used to optimize run and try to
368 // compress less combinations when their size is worse than the
369 // current best. It does not reflect the compression ratio, as
370 // it does not take the metadata into account.
371 int base_delta_ratio = 2;
372
373 // Check which base-delta size combination is the best. This is
374 // optimized by giving priority to trying the compressor that would
375 // generate the smallest compression size. This way we waste less
376 // simulation time by not doing all possible combinations
377 for (int ratio = 8; ratio >= base_delta_ratio; ratio/=2) {
378 for (int base_size = 8; base_size >= ratio; base_size/=2) {
379 // If using more compressors, parse all delta sizes from 1 to
380 // one size smaller than the base size, otherwise just parse
381 // highest possible delta. When we only instantiate one delta
382 // size per base size, we use less area and energy, at the
383 // cost of lower compression efficiency
384 const int delta_size = base_size/ratio;
385 if (!useMoreCompressors && delta_size != base_size/2) {
386 continue;
387 }
388
389 std::unique_ptr<BDICompData> temp_bdi_data;
390
391 // Get the compression result for the current combination
392 if ((base_size == 8)&&(delta_size == 4)) {
393 temp_bdi_data = tryCompress<uint64_t, int32_t>(data,
394 BASE8_4);
395 } else if ((base_size == 8)&&(delta_size == 2)) {
396 temp_bdi_data = tryCompress<uint64_t, int16_t>(data,
397 BASE8_2);
398 } else if ((base_size == 8)&&(delta_size == 1)) {
399 temp_bdi_data = tryCompress<uint64_t, int8_t>(data,
400 BASE8_1);
401 } else if ((base_size == 4)&&(delta_size == 2)) {
402 temp_bdi_data = tryCompress<uint32_t, int16_t>(data,
403 BASE4_2);
404 } else if ((base_size == 4)&&(delta_size == 1)) {
405 temp_bdi_data = tryCompress<uint32_t, int8_t>(data,
406 BASE4_1);
407 } else if ((base_size == 2)&&(delta_size == 1)) {
408 temp_bdi_data = tryCompress<uint16_t, int8_t>(data,
409 BASE2_1);
410 } else {
411 fatal("Invalid combination of base and delta sizes.");
412 }
413
414 // If compressor was successful, check if new compression
415 // improves the compression factor
416 if (temp_bdi_data &&
417 (bdi_data->getSizeBits() > temp_bdi_data->getSizeBits()))
418 {
419 bdi_data = std::move(temp_bdi_data);
420 base_delta_ratio = ratio;
421 }
422
423 // Clear temp pointer
424 temp_bdi_data.reset(nullptr);
425 }
426 }
427
428 // Set compression latency. A successful compressor will stop all
429 // other compressors when it knows no other will generate a better
430 // compression. However, this requires either hard-coding, or a complex
431 // function that can calculate the exact size of every compressor for
432 // every cache line size. We decide to take a conservative
433 // optimization: assume that all compressors with a given base size
434 // delta size ratio (no-metadata ratio) must wait for each other.
435 // In the worst case scenario the data is left uncompressed, so
436 // it loses the time of the worst base size (2 bytes per cycle)
437 comp_lat = Cycles(blkSize/base_delta_ratio);
438 }
439
440 // Update stats
441 encodingStats[bdi_data->getEncoding()]++;
442
443 // Pack compression results (1 extra cycle)
444 comp_lat += Cycles(1);
445
446 // Set decompression latency (latency of an adder)
447 decomp_lat = Cycles(1);
448
449 // Print debug information
450 DPRINTF(CacheComp, "BDI: Compressed cache line to encoding %s (%d bits)\n",
451 bdi_data->getName(), bdi_data->getSizeBits());
452
453 return std::move(bdi_data);
454}
455
456void
457BDI::regStats()
458{
459 BaseCacheCompressor::regStats();
460
461 // We store the frequency of each encoding
462 encodingStats
463 .init(NUM_ENCODINGS)
464 .name(name() + ".encoding")
465 .desc("Number of data entries that were compressed to this encoding.")
466 ;
467
468 for (unsigned i = 0; i < NUM_ENCODINGS; ++i) {
469 encodingStats.subname(i, ENCODING_NAMES[i]);
470 encodingStats.subdesc(i, "Number of data entries that match " \
471 "encoding " + std::string(ENCODING_NAMES[i]));
472 }
473}
474
475BDI*
476BDIParams::create()
477{
478 return new BDI(this);
479}
480
481#undef BYTES_PER_QWORD
170
171 // Number of bits used by Bases. bases[0] is implicit in a hardware
172 // implementation, therefore its size is 0
173 size += (maxNumBases-1)*sizeof(TB)*CHAR_BIT;
174
175 // Number of bits used by Deltas
176 size += deltas.size()*sizeof(TD)*CHAR_BIT;
177
178 CompressionData::setSizeBits(size);
179}
180
181template <class TB, class TD>
182bool
183BDI::BDICompDataBaseDelta<TB, TD>::addBase(const TB base)
184{
185 // Can't add base if reached limit of number of bases
186 if (bases.size() >= maxNumBases) {
187 return false;
188 }
189
190 // Push new base to end of bases list
191 bases.push_back(base);
192
193 // New delta is 0, as it is a difference between the new base and itself
194 addDelta(bases.size() - 1, 0);
195
196 return true;
197}
198
199template <class TB, class TD>
200void
201BDI::BDICompDataBaseDelta<TB, TD>::addDelta(const std::size_t base_index,
202 const TD delta)
203{
204 // Insert new delta with respect to the given base
205 bitMask.push_back(base_index);
206
207 // Insert new delta
208 deltas.push_back(delta);
209}
210
211template <class TB, class TD> bool
212BDI::BDICompDataBaseDelta<TB, TD>::compress(const uint64_t* data,
213 const std::size_t blk_size)
214{
215 // Parse through data in a sizeof(TB) granularity
216 for (std::size_t byte_start = 0; byte_start < blk_size;
217 byte_start += sizeof(TB))
218 {
219 // Get current value
220 TB curValue;
221 std::memcpy(&curValue, ((uint8_t*)data) + byte_start,
222 sizeof(TB));
223
224 // Iterate through all bases to search for a valid delta
225 bool foundDelta = false;
226 for (int base_index = 0; base_index < bases.size(); base_index++) {
227 // Calculate delta relative to currently parsed base
228 typename std::make_signed<TB>::type delta = curValue -
229 bases[base_index];
230
231 // Check if the delta is within the limits of the delta size. If
232 // that is the case, add delta to compressed data and keep parsing
233 // the input data
234 typename std::make_signed<TB>::type limit =
235 ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TD))*CHAR_BIT+1);
236 if ((delta >= -limit) && (delta <= limit)) {
237 addDelta(base_index, delta);
238 foundDelta = true;
239 break;
240 }
241 }
242
243 // If we cannot find a base that can accommodate this new line's data,
244 // add this value as the new base and insert its respective delta of 0.
245 // If the new base can't be added, it means that we reached the base
246 // limit, so line is uncompressible using the given encoding
247 if (!foundDelta && !addBase(curValue)) {
248 return false;
249 }
250 }
251
252 // Calculate compressed size
253 calculateCompressedSize();
254
255 return true;
256}
257
258template <class TB, class TD>
259uint64_t
260BDI::BDICompDataBaseDelta<TB, TD>::access(const int index) const
261{
262 // We decompress all base-delta pairs that form the 64-bit entry
263 // corresponding to the given 64-bit-array index
264 uint64_t value = 0;
265
266 // Get relationship between the size of an uint64_t base and size of TB
267 const std::size_t size_diff = sizeof(uint64_t)/sizeof(TB);
268
269 // Mask for a base entry
270 const uint64_t mask = ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TB))*CHAR_BIT);
271
272 // Size, in bits, of a base entry. Cant be const because compiler will
273 // optimize and create a 64 bit instance, which will generate a shift size
274 // compilation error
275 int base_size_bits = sizeof(TB)*CHAR_BIT;
276
277 // Concatenate all base-delta entries until they form a 64-bit entry
278 for (int delta_index = size_diff * (index + 1) - 1;
279 delta_index >= (int)(size_diff * index); delta_index--) {
280 // Get base and delta entries corresponding to the current delta
281 assert(delta_index < deltas.size());
282 const TD delta = deltas[delta_index];
283 const int base_index = bitMask[delta_index];
284 assert(base_index < bases.size());
285 const TB base = bases[base_index];
286
287 // Concatenate decompressed value
288 value <<= base_size_bits;
289 value |= static_cast<uint64_t>((base + delta) & mask);
290 }
291
292 return value;
293}
294
295BDI::BDI(const Params *p)
296 : BaseCacheCompressor(p), useMoreCompressors(p->use_more_compressors),
297 qwordsPerCacheLine(blkSize/BYTES_PER_QWORD)
298{
299 static_assert(sizeof(ENCODING_NAMES)/sizeof(char*) == NUM_ENCODINGS,
300 "Number of encodings doesn't match the number of names");
301}
302
303bool
304BDI::isZeroPackable(const uint64_t* data) const
305{
306 return std::all_of(data, data + qwordsPerCacheLine,
307 [](const uint64_t entry){ return entry == 0; });
308}
309
310bool
311BDI::isSameValuePackable(const uint64_t* data) const
312{
313 // We don't want to copy the whole array to the lambda expression
314 const uint64_t rep_value = data[0];
315 return std::all_of(data, data + qwordsPerCacheLine,
316 [rep_value](const uint64_t entry)
317 {return entry == rep_value;});
318}
319
320template <class TB, class TD>
321std::unique_ptr<BDI::BDICompData>
322BDI::tryCompress(const uint64_t* data, const uint8_t encoding) const
323{
324 // Instantiate compressor
325 auto temp_data = std::unique_ptr<BDICompDataBaseDelta<TB, TD>>(
326 new BDICompDataBaseDelta<TB, TD>(encoding, blkSize));
327
328 // Try compressing. Return nullptr if compressor can't compress given line
329 if (temp_data->compress(data, blkSize)) {
330 return std::move(temp_data);
331 } else {
332 return std::unique_ptr<BDICompData>{};
333 }
334}
335
336void
337BDI::decompress(const BaseCacheCompressor::CompressionData* comp_data,
338 uint64_t* data)
339{
340 // Decompress and go back to host endianness
341 for (std::size_t i = 0; i < qwordsPerCacheLine; i++)
342 data[i] = static_cast<const BDICompData*>(comp_data)->access(i);
343}
344
345std::unique_ptr<BaseCacheCompressor::CompressionData>
346BDI::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
347{
348 std::unique_ptr<BDICompData> bdi_data;
349
350 // Check if it is a zero line
351 if (isZeroPackable(data)) {
352 bdi_data = std::unique_ptr<BDICompData>(new BDICompDataZeros());
353
354 // Set compression latency (compare 1 qword per cycle)
355 comp_lat = Cycles(qwordsPerCacheLine);
356 // Check if all values in the line are the same
357 } else if (isSameValuePackable(data)) {
358 bdi_data = std::unique_ptr<BDICompData>(new BDICompDataRep(data[0]));
359
360 // Set compression latency (compare 1 qword per cycle)
361 comp_lat = Cycles(qwordsPerCacheLine);
362 } else {
363 // Initialize compressed data as an uncompressed instance
364 bdi_data = std::unique_ptr<BDICompData>(
365 new BDICompDataUncompressed(data, blkSize));
366
367 // Base size-delta size ratio. Used to optimize run and try to
368 // compress less combinations when their size is worse than the
369 // current best. It does not reflect the compression ratio, as
370 // it does not take the metadata into account.
371 int base_delta_ratio = 2;
372
373 // Check which base-delta size combination is the best. This is
374 // optimized by giving priority to trying the compressor that would
375 // generate the smallest compression size. This way we waste less
376 // simulation time by not doing all possible combinations
377 for (int ratio = 8; ratio >= base_delta_ratio; ratio/=2) {
378 for (int base_size = 8; base_size >= ratio; base_size/=2) {
379 // If using more compressors, parse all delta sizes from 1 to
380 // one size smaller than the base size, otherwise just parse
381 // highest possible delta. When we only instantiate one delta
382 // size per base size, we use less area and energy, at the
383 // cost of lower compression efficiency
384 const int delta_size = base_size/ratio;
385 if (!useMoreCompressors && delta_size != base_size/2) {
386 continue;
387 }
388
389 std::unique_ptr<BDICompData> temp_bdi_data;
390
391 // Get the compression result for the current combination
392 if ((base_size == 8)&&(delta_size == 4)) {
393 temp_bdi_data = tryCompress<uint64_t, int32_t>(data,
394 BASE8_4);
395 } else if ((base_size == 8)&&(delta_size == 2)) {
396 temp_bdi_data = tryCompress<uint64_t, int16_t>(data,
397 BASE8_2);
398 } else if ((base_size == 8)&&(delta_size == 1)) {
399 temp_bdi_data = tryCompress<uint64_t, int8_t>(data,
400 BASE8_1);
401 } else if ((base_size == 4)&&(delta_size == 2)) {
402 temp_bdi_data = tryCompress<uint32_t, int16_t>(data,
403 BASE4_2);
404 } else if ((base_size == 4)&&(delta_size == 1)) {
405 temp_bdi_data = tryCompress<uint32_t, int8_t>(data,
406 BASE4_1);
407 } else if ((base_size == 2)&&(delta_size == 1)) {
408 temp_bdi_data = tryCompress<uint16_t, int8_t>(data,
409 BASE2_1);
410 } else {
411 fatal("Invalid combination of base and delta sizes.");
412 }
413
414 // If compressor was successful, check if new compression
415 // improves the compression factor
416 if (temp_bdi_data &&
417 (bdi_data->getSizeBits() > temp_bdi_data->getSizeBits()))
418 {
419 bdi_data = std::move(temp_bdi_data);
420 base_delta_ratio = ratio;
421 }
422
423 // Clear temp pointer
424 temp_bdi_data.reset(nullptr);
425 }
426 }
427
428 // Set compression latency. A successful compressor will stop all
429 // other compressors when it knows no other will generate a better
430 // compression. However, this requires either hard-coding, or a complex
431 // function that can calculate the exact size of every compressor for
432 // every cache line size. We decide to take a conservative
433 // optimization: assume that all compressors with a given base size
434 // delta size ratio (no-metadata ratio) must wait for each other.
435 // In the worst case scenario the data is left uncompressed, so
436 // it loses the time of the worst base size (2 bytes per cycle)
437 comp_lat = Cycles(blkSize/base_delta_ratio);
438 }
439
440 // Update stats
441 encodingStats[bdi_data->getEncoding()]++;
442
443 // Pack compression results (1 extra cycle)
444 comp_lat += Cycles(1);
445
446 // Set decompression latency (latency of an adder)
447 decomp_lat = Cycles(1);
448
449 // Print debug information
450 DPRINTF(CacheComp, "BDI: Compressed cache line to encoding %s (%d bits)\n",
451 bdi_data->getName(), bdi_data->getSizeBits());
452
453 return std::move(bdi_data);
454}
455
456void
457BDI::regStats()
458{
459 BaseCacheCompressor::regStats();
460
461 // We store the frequency of each encoding
462 encodingStats
463 .init(NUM_ENCODINGS)
464 .name(name() + ".encoding")
465 .desc("Number of data entries that were compressed to this encoding.")
466 ;
467
468 for (unsigned i = 0; i < NUM_ENCODINGS; ++i) {
469 encodingStats.subname(i, ENCODING_NAMES[i]);
470 encodingStats.subdesc(i, "Number of data entries that match " \
471 "encoding " + std::string(ENCODING_NAMES[i]));
472 }
473}
474
475BDI*
476BDIParams::create()
477{
478 return new BDI(this);
479}
480
481#undef BYTES_PER_QWORD