fse_predefined.go 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "fmt"
  7. "math"
  8. "sync"
  9. )
  10. var (
  11. // fsePredef are the predefined fse tables as defined here:
  12. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
  13. // These values are already transformed.
  14. fsePredef [3]fseDecoder
  15. // fsePredefEnc are the predefined encoder based on fse tables as defined here:
  16. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
  17. // These values are already transformed.
  18. fsePredefEnc [3]fseEncoder
  19. // symbolTableX contain the transformations needed for each type as defined in
  20. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets
  21. symbolTableX [3][]baseOffset
  22. // maxTableSymbol is the biggest supported symbol for each table type
  23. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets
  24. maxTableSymbol = [3]uint8{tableLiteralLengths: maxLiteralLengthSymbol, tableOffsets: maxOffsetLengthSymbol, tableMatchLengths: maxMatchLengthSymbol}
  25. // bitTables is the bits table for each table.
  26. bitTables = [3][]byte{tableLiteralLengths: llBitsTable[:], tableOffsets: nil, tableMatchLengths: mlBitsTable[:]}
  27. )
  28. type tableIndex uint8
  29. const (
  30. // indexes for fsePredef and symbolTableX
  31. tableLiteralLengths tableIndex = 0
  32. tableOffsets tableIndex = 1
  33. tableMatchLengths tableIndex = 2
  34. maxLiteralLengthSymbol = 35
  35. maxOffsetLengthSymbol = 30
  36. maxMatchLengthSymbol = 52
  37. )
  38. // baseOffset is used for calculating transformations.
  39. type baseOffset struct {
  40. baseLine uint32
  41. addBits uint8
  42. }
  43. // fillBase will precalculate base offsets with the given bit distributions.
  44. func fillBase(dst []baseOffset, base uint32, bits ...uint8) {
  45. if len(bits) != len(dst) {
  46. panic(fmt.Sprintf("len(dst) (%d) != len(bits) (%d)", len(dst), len(bits)))
  47. }
  48. for i, bit := range bits {
  49. if base > math.MaxInt32 {
  50. panic("invalid decoding table, base overflows int32")
  51. }
  52. dst[i] = baseOffset{
  53. baseLine: base,
  54. addBits: bit,
  55. }
  56. base += 1 << bit
  57. }
  58. }
  59. var predef sync.Once
  60. func initPredefined() {
  61. predef.Do(func() {
  62. // Literals length codes
  63. tmp := make([]baseOffset, 36)
  64. for i := range tmp[:16] {
  65. tmp[i] = baseOffset{
  66. baseLine: uint32(i),
  67. addBits: 0,
  68. }
  69. }
  70. fillBase(tmp[16:], 16, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
  71. symbolTableX[tableLiteralLengths] = tmp
  72. // Match length codes
  73. tmp = make([]baseOffset, 53)
  74. for i := range tmp[:32] {
  75. tmp[i] = baseOffset{
  76. // The transformation adds the 3 length.
  77. baseLine: uint32(i) + 3,
  78. addBits: 0,
  79. }
  80. }
  81. fillBase(tmp[32:], 35, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
  82. symbolTableX[tableMatchLengths] = tmp
  83. // Offset codes
  84. tmp = make([]baseOffset, maxOffsetBits+1)
  85. tmp[1] = baseOffset{
  86. baseLine: 1,
  87. addBits: 1,
  88. }
  89. fillBase(tmp[2:], 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)
  90. symbolTableX[tableOffsets] = tmp
  91. // Fill predefined tables and transform them.
  92. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
  93. for i := range fsePredef[:] {
  94. f := &fsePredef[i]
  95. switch tableIndex(i) {
  96. case tableLiteralLengths:
  97. // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L243
  98. f.actualTableLog = 6
  99. copy(f.norm[:], []int16{4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
  100. 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
  101. -1, -1, -1, -1})
  102. f.symbolLen = 36
  103. case tableOffsets:
  104. // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L281
  105. f.actualTableLog = 5
  106. copy(f.norm[:], []int16{
  107. 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
  108. 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1})
  109. f.symbolLen = 29
  110. case tableMatchLengths:
  111. //https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L304
  112. f.actualTableLog = 6
  113. copy(f.norm[:], []int16{
  114. 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
  115. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  116. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
  117. -1, -1, -1, -1, -1})
  118. f.symbolLen = 53
  119. }
  120. if err := f.buildDtable(); err != nil {
  121. panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
  122. }
  123. if err := f.transform(symbolTableX[i]); err != nil {
  124. panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
  125. }
  126. f.preDefined = true
  127. // Create encoder as well
  128. enc := &fsePredefEnc[i]
  129. copy(enc.norm[:], f.norm[:])
  130. enc.symbolLen = f.symbolLen
  131. enc.actualTableLog = f.actualTableLog
  132. if err := enc.buildCTable(); err != nil {
  133. panic(fmt.Errorf("building encoding table %v: %v", tableIndex(i), err))
  134. }
  135. enc.setBits(bitTables[i])
  136. enc.preDefined = true
  137. }
  138. })
  139. }