enc_base.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. package zstd
  2. import (
  3. "fmt"
  4. "math/bits"
  5. "github.com/klauspost/compress/zstd/internal/xxhash"
  6. )
  7. const (
  8. dictShardBits = 6
  9. )
  10. type fastBase struct {
  11. // cur is the offset at the start of hist
  12. cur int32
  13. // maximum offset. Should be at least 2x block size.
  14. maxMatchOff int32
  15. bufferReset int32
  16. hist []byte
  17. crc *xxhash.Digest
  18. tmp [8]byte
  19. blk *blockEnc
  20. lastDictID uint32
  21. lowMem bool
  22. }
  23. // CRC returns the underlying CRC writer.
  24. func (e *fastBase) CRC() *xxhash.Digest {
  25. return e.crc
  26. }
  27. // AppendCRC will append the CRC to the destination slice and return it.
  28. func (e *fastBase) AppendCRC(dst []byte) []byte {
  29. crc := e.crc.Sum(e.tmp[:0])
  30. dst = append(dst, crc[7], crc[6], crc[5], crc[4])
  31. return dst
  32. }
  33. // WindowSize returns the window size of the encoder,
  34. // or a window size small enough to contain the input size, if > 0.
  35. func (e *fastBase) WindowSize(size int64) int32 {
  36. if size > 0 && size < int64(e.maxMatchOff) {
  37. b := int32(1) << uint(bits.Len(uint(size)))
  38. // Keep minimum window.
  39. if b < 1024 {
  40. b = 1024
  41. }
  42. return b
  43. }
  44. return e.maxMatchOff
  45. }
  46. // Block returns the current block.
  47. func (e *fastBase) Block() *blockEnc {
  48. return e.blk
  49. }
  50. func (e *fastBase) addBlock(src []byte) int32 {
  51. if debugAsserts && e.cur > e.bufferReset {
  52. panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
  53. }
  54. // check if we have space already
  55. if len(e.hist)+len(src) > cap(e.hist) {
  56. if cap(e.hist) == 0 {
  57. e.ensureHist(len(src))
  58. } else {
  59. if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
  60. panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
  61. }
  62. // Move down
  63. offset := int32(len(e.hist)) - e.maxMatchOff
  64. copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
  65. e.cur += offset
  66. e.hist = e.hist[:e.maxMatchOff]
  67. }
  68. }
  69. s := int32(len(e.hist))
  70. e.hist = append(e.hist, src...)
  71. return s
  72. }
  73. // ensureHist will ensure that history can keep at least this many bytes.
  74. func (e *fastBase) ensureHist(n int) {
  75. if cap(e.hist) >= n {
  76. return
  77. }
  78. l := e.maxMatchOff
  79. if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
  80. l += maxCompressedBlockSize
  81. } else {
  82. l += e.maxMatchOff
  83. }
  84. // Make it at least 1MB.
  85. if l < 1<<20 && !e.lowMem {
  86. l = 1 << 20
  87. }
  88. // Make it at least the requested size.
  89. if l < int32(n) {
  90. l = int32(n)
  91. }
  92. e.hist = make([]byte, 0, l)
  93. }
  94. // useBlock will replace the block with the provided one,
  95. // but transfer recent offsets from the previous.
  96. func (e *fastBase) UseBlock(enc *blockEnc) {
  97. enc.reset(e.blk)
  98. e.blk = enc
  99. }
  100. func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
  101. if debugAsserts {
  102. if s < 0 {
  103. err := fmt.Sprintf("s (%d) < 0", s)
  104. panic(err)
  105. }
  106. if t < 0 {
  107. err := fmt.Sprintf("s (%d) < 0", s)
  108. panic(err)
  109. }
  110. if s-t > e.maxMatchOff {
  111. err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
  112. panic(err)
  113. }
  114. if len(src)-int(s) > maxCompressedBlockSize {
  115. panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
  116. }
  117. }
  118. return int32(matchLen(src[s:], src[t:]))
  119. }
  120. // Reset the encoding table.
  121. func (e *fastBase) resetBase(d *dict, singleBlock bool) {
  122. if e.blk == nil {
  123. e.blk = &blockEnc{lowMem: e.lowMem}
  124. e.blk.init()
  125. } else {
  126. e.blk.reset(nil)
  127. }
  128. e.blk.initNewEncode()
  129. if e.crc == nil {
  130. e.crc = xxhash.New()
  131. } else {
  132. e.crc.Reset()
  133. }
  134. e.blk.dictLitEnc = nil
  135. if d != nil {
  136. low := e.lowMem
  137. if singleBlock {
  138. e.lowMem = true
  139. }
  140. e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
  141. e.lowMem = low
  142. }
  143. // We offset current position so everything will be out of reach.
  144. // If above reset line, history will be purged.
  145. if e.cur < e.bufferReset {
  146. e.cur += e.maxMatchOff + int32(len(e.hist))
  147. }
  148. e.hist = e.hist[:0]
  149. if d != nil {
  150. // Set offsets (currently not used)
  151. for i, off := range d.offsets {
  152. e.blk.recentOffsets[i] = uint32(off)
  153. e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i]
  154. }
  155. // Transfer litenc.
  156. e.blk.dictLitEnc = d.litEnc
  157. e.hist = append(e.hist, d.content...)
  158. }
  159. }