1/* 2 * Copyright (c) 2018 Inria 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Daniel Carvalho 29 */ 30 31/** @file 32 * Definition of CPack compression, from "C-Pack: A High-Performance 33 * Microprocessor Cache Compression Algorithm". 34 * 35 * The dictionary is composed of 32-bit entries. 36 * 37 * The patterns are implemented as individual classes that have a checking 38 * function isPattern(), to determine if the data fits the pattern, and a 39 * decompress() function, which decompresses the contents of a pattern. 40 * Every new pattern must inherit from the Pattern class and be added to the 41 * patternFactory. 42 */ 43 44#ifndef __MEM_CACHE_COMPRESSORS_CPACK_HH__ 45#define __MEM_CACHE_COMPRESSORS_CPACK_HH__ 46 47#include <array> 48#include <cstdint> 49#include <map> 50#include <memory> 51#include <vector> 52 53#include "base/types.hh" 54#include "mem/cache/compressors/base.hh" 55 56struct CPackParams; 57 58class CPack : public BaseCacheCompressor 59{ 60 private: 61 /** 62 * Compression data for CPack. It consists of a vector of patterns. 63 */ 64 class CompData; 65 66 // Forward declaration of all possible patterns 67 class Pattern; 68 class PatternZZZZ; 69 class PatternXXXX; 70 class PatternMMMM; 71 class PatternMMXX; 72 class PatternZZZX; 73 class PatternMMMX; 74 75 /** 76 * Create a factory to determine if input matches a pattern. The if else 77 * chains are constructed by recursion. The patterns should be explored 78 * sorted by size for correct behaviour. 79 */ 80 template <class Head, class... Tail> 81 struct Factory 82 { 83 static std::unique_ptr<Pattern> getPattern( 84 const std::array<uint8_t, 4>& bytes, 85 const std::array<uint8_t, 4>& dict_bytes, const int match_location) 86 { 87 // If match this pattern, instantiate it. If a negative match 88 // location is used, the patterns that use the dictionary bytes 89 // must return false. This is used when there are no dictionary 90 // entries yet 91 if (Head::isPattern(bytes, dict_bytes, match_location)) { 92 return std::unique_ptr<Pattern>( 93 new Head(bytes, match_location)); 94 // Otherwise, go for next pattern 95 } else { 96 return Factory<Tail...>::getPattern(bytes, dict_bytes, 97 match_location); 98 } 99 } 100 }; 101 102 /** 103 * Specialization to end the recursion. 104 */ 105 template <class Head> 106 struct Factory<Head> 107 { 108 static std::unique_ptr<Pattern> getPattern( 109 const std::array<uint8_t, 4>& bytes, 110 const std::array<uint8_t, 4>& dict_bytes, const int match_location) 111 { 112 // Instantiate last pattern. Should be the XXXX pattern. 113 return std::unique_ptr<Pattern>(new Head(bytes, match_location)); 114 } 115 }; 116 117 /** 118 * Convenience factory declaration. The templates must be organized by 119 * size, with the smallest first, and "no-match" last. 120 */ 121 using PatternFactory = Factory<PatternZZZZ, PatternMMMM, PatternZZZX, 122 PatternMMMX, PatternMMXX, PatternXXXX>; 123 124 /** 125 * The dictionary. 126 */ 127 std::vector<std::array<uint8_t, 4>> dictionary; 128 129 /** 130 * Dictionary size. 131 */ 132 const std::size_t dictionarySize; 133 134 /** 135 * Number of valid entries in the dictionary. 136 */ 137 std::size_t numEntries; 138 139 /** 140 * @defgroup CompressionStats Compression specific statistics. 141 * @{ 142 */ 143 144 /** 145 * Number of data entries that were compressed to each pattern. 146 */ 147 Stats::Vector patternStats; 148 149 /** 150 * @} 151 */ 152 153 /** 154 * Compress data. 155 * 156 * @param data Data to be compressed. 157 * @return The pattern this data matches. 158 */ 159 std::unique_ptr<Pattern> compressWord(const uint32_t data); 160 161 /** 162 * Decompress a word. 163 * 164 * @param pattern The pattern to be decompressed. 165 * @return The decompressed word. 166 */ 167 uint32_t decompressWord(const Pattern* pattern); 168 169 /** 170 * Clear all dictionary entries. 171 */ 172 void resetDictionary(); 173 174 /** 175 * Apply compression. 176 * 177 * @param data The cache line to be compressed. 178 * @param comp_lat Compression latency in number of cycles. 179 * @param decomp_lat Decompression latency in number of cycles. 180 * @return Cache line after compression. 181 */ 182 std::unique_ptr<BaseCacheCompressor::CompressionData> compress( 183 const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat) override; 184 185 /** 186 * Decompress data. 187 * 188 * @param comp_data Compressed cache line. 189 * @param data The cache line to be decompressed. 190 */ 191 void decompress(const CompressionData* comp_data, uint64_t* data) override; 192 193 public: 194 /** Convenience typedef. */ 195 typedef CPackParams Params; 196 197 /** 198 * Default constructor. 199 */ 200 CPack(const Params *p); 201 202 /** 203 * Default destructor. 204 */ 205 ~CPack() {}; 206 207 /** 208 * Register local statistics. 209 */ 210 void regStats() override; 211}; 212 213/** 214 * The compressed data is composed of multiple pattern entries. To add a new 215 * pattern one should inherit from this class and implement isPattern() and 216 * decompress. Then the new pattern must be added to the PatternFactory 217 * declaration in crescent order of size (in the CPack class). The pattern 218 * must be also added to the Name enum in the CPack::Pattern class before 219 * NUM_PATTERNS. 220 */ 221class CPack::Pattern 222{ 223 protected: 224 225 /** 226 * The patterns proposed in the paper. Each letter represents a byte: 227 * Z is a null byte, M is a dictionary match, X is a new value. 228 * These are used as indexes to reference the pattern data. If a new 229 * pattern is added, it must be done before NUM_PATTERNS. 230 */ 231 typedef enum { 232 ZZZZ, XXXX, MMMM, MMXX, ZZZX, MMMX, NUM_PATTERNS 233 } PatternNumber; 234 235 /** 236 * Pattern enum number. 237 */ 238 const PatternNumber patternNumber; 239 240 /** 241 * Code associated to the pattern. 242 */ 243 const uint8_t code; 244 245 /** 246 * Length, in bits, of the code and match location. 247 */ 248 const uint8_t length; 249 250 /** 251 * Number of unmatched bytes; 252 */ 253 const uint8_t numUnmatchedBytes; 254 255 /** 256 * Index representing the the match location. 257 */ 258 const int matchLocation; 259 260 /** 261 * Wether the pattern allocates a dictionary entry or not. 262 */ 263 const bool allocate; 264 265 /** 266 * Get code of this pattern. 267 * 268 * @return The code. 269 */ 270 uint8_t getCode() const { return code; } 271 272 public: 273 /** 274 * Default constructor. 275 * 276 * @param number Pattern number. 277 * @param code Code associated to this pattern. 278 * @param metadata_length Length, in bits, of the code and match location. 279 * @param num_unmatched_bytes Number of unmatched bytes. 280 * @param match_location Index of the match location. 281 */ 282 Pattern(const PatternNumber number, const uint64_t code, 283 const uint64_t metadata_length, const uint64_t num_unmatched_bytes, 284 const int match_location, const bool allocate = true) 285 : patternNumber(number), code(code), length(metadata_length), 286 numUnmatchedBytes(num_unmatched_bytes), 287 matchLocation(match_location), allocate(allocate) {}; 288 289 /** 290 * Default destructor. 291 */ 292 virtual ~Pattern() = default; 293 294 /** 295 * Trick function to get the number of patterns. 296 * 297 * @return The number of defined patterns. 298 */ 299 static uint64_t getNumPatterns() { return NUM_PATTERNS; }; 300 301 /** 302 * Get enum number associated to this pattern. 303 * 304 * @return The pattern enum number. 305 */ 306 PatternNumber getPatternNumber() const { return patternNumber; }; 307 308 /** 309 * Get meta-name assigned to the given pattern. 310 * 311 * @param number The number of the pattern. 312 * @return The meta-name of the pattern. 313 */ 314 static std::string getName(int number) 315 { 316 static std::map<PatternNumber, std::string> patternNames = { 317 {ZZZZ, "ZZZZ"}, {XXXX, "XXXX"}, {MMMM, "MMMM"}, 318 {MMXX, "MMXX"}, {ZZZX, "ZZZX"}, {MMMX, "MMMX"} 319 }; 320 321 return patternNames[(PatternNumber)number]; 322 }; 323 324 /** 325 * Get the index of the dictionary match location. 326 * 327 * @return The index of the match location. 328 */ 329 uint8_t getMatchLocation() const { return matchLocation; } 330 331 /** 332 * Get size, in bits, of the pattern (excluding prefix). Corresponds to 333 * unmatched_data_size + code_length. 334 * 335 * @return The size. 336 */ 337 std::size_t getSizeBits() const { 338 return numUnmatchedBytes*CHAR_BIT + length; 339 } 340 341 /** 342 * Determine if pattern allocates a dictionary entry. 343 * 344 * @return True if should allocate a dictionary entry. 345 */ 346 bool shouldAllocate() const { 347 return allocate; 348 } 349 350 std::string print() const { 351 return csprintf("pattern %s (encoding %x, size %u bits)", 352 getName(patternNumber), getCode(), getSizeBits()); 353 } 354 355 /** 356 * Decompress the pattern. Each pattern has its own way of interpreting 357 * its data. 358 * 359 * @param dict_bytes The bytes in the corresponding matching entry. 360 * @param data The decompressed pattern. 361 * @return Whether entry should be added to dictionary or not. 362 */ 363 virtual bool decompress(const std::array<uint8_t, 4> dict_bytes, 364 std::array<uint8_t, 4>& data) const = 0; 365}; 366 367class CPack::PatternZZZZ : public Pattern 368{ 369 public: 370 PatternZZZZ(const std::array<uint8_t, 4> bytes, const int match_location) 371 : Pattern(ZZZZ, 0x0, 2, 0, 0, false) {} 372 373 static bool isPattern(const std::array<uint8_t, 4>& bytes, 374 const std::array<uint8_t, 4>& dict_bytes, 375 const int match_location) 376 { 377 return (bytes[3] == 0) && (bytes[2] == 0) && (bytes[1] == 0) && 378 (bytes[0] == 0); 379 } 380 381 bool decompress(const std::array<uint8_t, 4> dict_bytes, 382 std::array<uint8_t, 4>& data) const override 383 { 384 data = {0, 0, 0, 0}; 385 return false; 386 } 387}; 388 389class CPack::PatternXXXX : public Pattern 390{ 391 private: 392 /** 393 * A copy of the word. 394 */ 395 const std::array<uint8_t, 4> bytes; 396 397 public: 398 PatternXXXX(const std::array<uint8_t, 4> bytes, const int match_location) 399 : Pattern(XXXX, 0x1, 2, 4, 0, true), bytes(bytes) {} 400 401 static bool isPattern(const std::array<uint8_t, 4>& bytes, 402 const std::array<uint8_t, 4>& dict_bytes, 403 const int match_location) 404 { 405 // It can always be an unmatch, as it is set to this class when other 406 // patterns fail 407 return true; 408 } 409 410 bool decompress(const std::array<uint8_t, 4> dict_bytes, 411 std::array<uint8_t, 4>& data) const override 412 { 413 data = bytes; 414 return true; 415 } 416}; 417 418class CPack::PatternMMMM : public Pattern 419{ 420 public: 421 PatternMMMM(const std::array<uint8_t, 4> bytes, const int match_location) 422 : Pattern(MMMM, 0x2, 6, 0, match_location, true) {} 423 424 static bool isPattern(const std::array<uint8_t, 4>& bytes, 425 const std::array<uint8_t, 4>& dict_bytes, 426 const int match_location) 427 { 428 return (bytes == dict_bytes) && (match_location >= 0); 429 } 430 431 bool decompress(const std::array<uint8_t, 4> dict_bytes, 432 std::array<uint8_t, 4>& data) const override 433 { 434 data = dict_bytes; 435 return true; 436 } 437}; 438 439class CPack::PatternMMXX : public Pattern 440{ 441 private: 442 /** 443 * A copy of the unmatched bytes. 444 */ 445 const uint8_t byte0; 446 const uint8_t byte1; 447 448 public: 449 PatternMMXX(const std::array<uint8_t, 4> bytes, const int match_location) 450 : Pattern(MMXX, 0xC, 8, 2, match_location, true), 451 byte0(bytes[0]), byte1(bytes[1]) {} 452 453 static bool isPattern(const std::array<uint8_t, 4>& bytes, 454 const std::array<uint8_t, 4>& dict_bytes, 455 const int match_location) 456 { 457 // Notice we don't compare bytes[0], as otherwise we'd be unnecessarily 458 // discarding MMXM. If that pattern is added this should be modified 459 return (bytes[3] == dict_bytes[3]) && (bytes[2] == dict_bytes[2]) && 460 (bytes[1] != dict_bytes[1]) && (match_location >= 0); 461 462 } 463 464 bool decompress(const std::array<uint8_t, 4> dict_bytes, 465 std::array<uint8_t, 4>& data) const override 466 { 467 data = {byte0, byte1, dict_bytes[2], dict_bytes[3]}; 468 return true; 469 } 470}; 471 472class CPack::PatternZZZX : public Pattern 473{ 474 private: 475 /** 476 * A copy of the unmatched byte. 477 */ 478 const uint8_t byte; 479 480 public: 481 PatternZZZX(const std::array<uint8_t, 4> bytes, const int match_location) 482 : Pattern(ZZZX, 0xD, 4, 1, 0, false), byte(bytes[0]) {} 483 484 static bool isPattern(const std::array<uint8_t, 4>& bytes, 485 const std::array<uint8_t, 4>& dict_bytes, 486 const int match_location) 487 { 488 return (bytes[3] == 0) && (bytes[2] == 0) && (bytes[1] == 0) && 489 (bytes[0] != 0); 490 } 491 492 bool decompress(const std::array<uint8_t, 4> dict_bytes, 493 std::array<uint8_t, 4>& data) const override 494 { 495 data = {byte, 0, 0, 0}; 496 return false; 497 } 498}; 499 500class CPack::PatternMMMX : public Pattern 501{ 502 private: 503 /** 504 * A copy of the unmatched byte. 505 */ 506 const uint8_t byte; 507 508 public: 509 PatternMMMX(const std::array<uint8_t, 4> bytes, const int match_location) 510 : Pattern(MMMX, 0xE, 8, 1, match_location, true), 511 byte(bytes[0]) {} 512 513 static bool isPattern(const std::array<uint8_t, 4>& bytes, 514 const std::array<uint8_t, 4>& dict_bytes, 515 const int match_location) 516 { 517 return (bytes[3] == dict_bytes[3]) && (bytes[2] == dict_bytes[2]) && 518 (bytes[1] == dict_bytes[1]) && (bytes[0] != dict_bytes[0]) && 519 (match_location >= 0); 520 } 521 522 bool decompress(const std::array<uint8_t, 4> dict_bytes, 523 std::array<uint8_t, 4>& data) const override 524 { 525 data = {byte, dict_bytes[1], dict_bytes[2], dict_bytes[3]}; 526 return true; 527 } 528}; 529 530class CPack::CompData : public CompressionData 531{ 532 public: 533 /** 534 * The patterns matched in the original line. 535 */ 536 std::vector<std::unique_ptr<Pattern>> entries; 537 538 /** 539 * Default constructor. 540 * 541 * @param dictionary_size Number of entries in the dictionary. 542 */ 543 CompData(const std::size_t dictionary_size); 544 545 /** 546 * Default destructor. 547 */ 548 ~CompData(); 549}; 550 551#endif //__MEM_CACHE_COMPRESSORS_CPACK_HH__ 552