decodeheader.go 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. // Copyright 2020+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. package zstd
  4. import (
  5. "encoding/binary"
  6. "errors"
  7. "io"
  8. )
  9. // HeaderMaxSize is the maximum size of a Frame and Block Header.
  10. // If less is sent to Header.Decode it *may* still contain enough information.
  11. const HeaderMaxSize = 14 + 3
  12. // Header contains information about the first frame and block within that.
  13. type Header struct {
  14. // SingleSegment specifies whether the data is to be decompressed into a
  15. // single contiguous memory segment.
  16. // It implies that WindowSize is invalid and that FrameContentSize is valid.
  17. SingleSegment bool
  18. // WindowSize is the window of data to keep while decoding.
  19. // Will only be set if SingleSegment is false.
  20. WindowSize uint64
  21. // Dictionary ID.
  22. // If 0, no dictionary.
  23. DictionaryID uint32
  24. // HasFCS specifies whether FrameContentSize has a valid value.
  25. HasFCS bool
  26. // FrameContentSize is the expected uncompressed size of the entire frame.
  27. FrameContentSize uint64
  28. // Skippable will be true if the frame is meant to be skipped.
  29. // This implies that FirstBlock.OK is false.
  30. Skippable bool
  31. // SkippableID is the user-specific ID for the skippable frame.
  32. // Valid values are between 0 to 15, inclusive.
  33. SkippableID int
  34. // SkippableSize is the length of the user data to skip following
  35. // the header.
  36. SkippableSize uint32
  37. // HeaderSize is the raw size of the frame header.
  38. //
  39. // For normal frames, it includes the size of the magic number and
  40. // the size of the header (per section 3.1.1.1).
  41. // It does not include the size for any data blocks (section 3.1.1.2) nor
  42. // the size for the trailing content checksum.
  43. //
  44. // For skippable frames, this counts the size of the magic number
  45. // along with the size of the size field of the payload.
  46. // It does not include the size of the skippable payload itself.
  47. // The total frame size is the HeaderSize plus the SkippableSize.
  48. HeaderSize int
  49. // First block information.
  50. FirstBlock struct {
  51. // OK will be set if first block could be decoded.
  52. OK bool
  53. // Is this the last block of a frame?
  54. Last bool
  55. // Is the data compressed?
  56. // If true CompressedSize will be populated.
  57. // Unfortunately DecompressedSize cannot be determined
  58. // without decoding the blocks.
  59. Compressed bool
  60. // DecompressedSize is the expected decompressed size of the block.
  61. // Will be 0 if it cannot be determined.
  62. DecompressedSize int
  63. // CompressedSize of the data in the block.
  64. // Does not include the block header.
  65. // Will be equal to DecompressedSize if not Compressed.
  66. CompressedSize int
  67. }
  68. // If set there is a checksum present for the block content.
  69. // The checksum field at the end is always 4 bytes long.
  70. HasCheckSum bool
  71. }
  72. // Decode the header from the beginning of the stream.
  73. // This will decode the frame header and the first block header if enough bytes are provided.
  74. // It is recommended to provide at least HeaderMaxSize bytes.
  75. // If the frame header cannot be read an error will be returned.
  76. // If there isn't enough input, io.ErrUnexpectedEOF is returned.
  77. // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
  78. func (h *Header) Decode(in []byte) error {
  79. _, err := h.DecodeAndStrip(in)
  80. return err
  81. }
  82. // DecodeAndStrip will decode the header from the beginning of the stream
  83. // and on success return the remaining bytes.
  84. // This will decode the frame header and the first block header if enough bytes are provided.
  85. // It is recommended to provide at least HeaderMaxSize bytes.
  86. // If the frame header cannot be read an error will be returned.
  87. // If there isn't enough input, io.ErrUnexpectedEOF is returned.
  88. // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
  89. func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
  90. *h = Header{}
  91. if len(in) < 4 {
  92. return nil, io.ErrUnexpectedEOF
  93. }
  94. h.HeaderSize += 4
  95. b, in := in[:4], in[4:]
  96. if string(b) != frameMagic {
  97. if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
  98. return nil, ErrMagicMismatch
  99. }
  100. if len(in) < 4 {
  101. return nil, io.ErrUnexpectedEOF
  102. }
  103. h.HeaderSize += 4
  104. h.Skippable = true
  105. h.SkippableID = int(b[0] & 0xf)
  106. h.SkippableSize = binary.LittleEndian.Uint32(in)
  107. return in[4:], nil
  108. }
  109. // Read Window_Descriptor
  110. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
  111. if len(in) < 1 {
  112. return nil, io.ErrUnexpectedEOF
  113. }
  114. fhd, in := in[0], in[1:]
  115. h.HeaderSize++
  116. h.SingleSegment = fhd&(1<<5) != 0
  117. h.HasCheckSum = fhd&(1<<2) != 0
  118. if fhd&(1<<3) != 0 {
  119. return nil, errors.New("reserved bit set on frame header")
  120. }
  121. if !h.SingleSegment {
  122. if len(in) < 1 {
  123. return nil, io.ErrUnexpectedEOF
  124. }
  125. var wd byte
  126. wd, in = in[0], in[1:]
  127. h.HeaderSize++
  128. windowLog := 10 + (wd >> 3)
  129. windowBase := uint64(1) << windowLog
  130. windowAdd := (windowBase / 8) * uint64(wd&0x7)
  131. h.WindowSize = windowBase + windowAdd
  132. }
  133. // Read Dictionary_ID
  134. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
  135. if size := fhd & 3; size != 0 {
  136. if size == 3 {
  137. size = 4
  138. }
  139. if len(in) < int(size) {
  140. return nil, io.ErrUnexpectedEOF
  141. }
  142. b, in = in[:size], in[size:]
  143. h.HeaderSize += int(size)
  144. switch len(b) {
  145. case 1:
  146. h.DictionaryID = uint32(b[0])
  147. case 2:
  148. h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
  149. case 4:
  150. h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
  151. }
  152. }
  153. // Read Frame_Content_Size
  154. // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
  155. var fcsSize int
  156. v := fhd >> 6
  157. switch v {
  158. case 0:
  159. if h.SingleSegment {
  160. fcsSize = 1
  161. }
  162. default:
  163. fcsSize = 1 << v
  164. }
  165. if fcsSize > 0 {
  166. h.HasFCS = true
  167. if len(in) < fcsSize {
  168. return nil, io.ErrUnexpectedEOF
  169. }
  170. b, in = in[:fcsSize], in[fcsSize:]
  171. h.HeaderSize += int(fcsSize)
  172. switch len(b) {
  173. case 1:
  174. h.FrameContentSize = uint64(b[0])
  175. case 2:
  176. // When FCS_Field_Size is 2, the offset of 256 is added.
  177. h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
  178. case 4:
  179. h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
  180. case 8:
  181. d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
  182. d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
  183. h.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
  184. }
  185. }
  186. // Frame Header done, we will not fail from now on.
  187. if len(in) < 3 {
  188. return in, nil
  189. }
  190. tmp := in[:3]
  191. bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
  192. h.FirstBlock.Last = bh&1 != 0
  193. blockType := blockType((bh >> 1) & 3)
  194. // find size.
  195. cSize := int(bh >> 3)
  196. switch blockType {
  197. case blockTypeReserved:
  198. return in, nil
  199. case blockTypeRLE:
  200. h.FirstBlock.Compressed = true
  201. h.FirstBlock.DecompressedSize = cSize
  202. h.FirstBlock.CompressedSize = 1
  203. case blockTypeCompressed:
  204. h.FirstBlock.Compressed = true
  205. h.FirstBlock.CompressedSize = cSize
  206. case blockTypeRaw:
  207. h.FirstBlock.DecompressedSize = cSize
  208. h.FirstBlock.CompressedSize = cSize
  209. default:
  210. panic("Invalid block type")
  211. }
  212. h.FirstBlock.OK = true
  213. return in, nil
  214. }
  215. // AppendTo will append the encoded header to the dst slice.
  216. // There is no error checking performed on the header values.
  217. func (h *Header) AppendTo(dst []byte) ([]byte, error) {
  218. if h.Skippable {
  219. magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
  220. magic[0] |= byte(h.SkippableID & 0xf)
  221. dst = append(dst, magic[:]...)
  222. f := h.SkippableSize
  223. return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
  224. }
  225. f := frameHeader{
  226. ContentSize: h.FrameContentSize,
  227. WindowSize: uint32(h.WindowSize),
  228. SingleSegment: h.SingleSegment,
  229. Checksum: h.HasCheckSum,
  230. DictID: h.DictionaryID,
  231. }
  232. return f.appendTo(dst), nil
  233. }