seqdec_amd64.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. //go:build amd64 && !appengine && !noasm && gc
  2. // +build amd64,!appengine,!noasm,gc
  3. package zstd
  4. import (
  5. "fmt"
  6. "io"
  7. "github.com/klauspost/compress/internal/cpuinfo"
  8. )
  9. type decodeSyncAsmContext struct {
  10. llTable []decSymbol
  11. mlTable []decSymbol
  12. ofTable []decSymbol
  13. llState uint64
  14. mlState uint64
  15. ofState uint64
  16. iteration int
  17. litRemain int
  18. out []byte
  19. outPosition int
  20. literals []byte
  21. litPosition int
  22. history []byte
  23. windowSize int
  24. ll int // set on error (not for all errors, please refer to _generate/gen.go)
  25. ml int // set on error (not for all errors, please refer to _generate/gen.go)
  26. mo int // set on error (not for all errors, please refer to _generate/gen.go)
  27. }
  28. // sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
  29. //
  30. // Please refer to seqdec_generic.go for the reference implementation.
  31. //
  32. //go:noescape
  33. func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  34. // sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
  35. //
  36. //go:noescape
  37. func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  38. // sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
  39. //
  40. //go:noescape
  41. func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  42. // sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
  43. //
  44. //go:noescape
  45. func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
  46. // decode sequences from the stream with the provided history but without a dictionary.
  47. func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
  48. if len(s.dict) > 0 {
  49. return false, nil
  50. }
  51. if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
  52. return false, nil
  53. }
  54. // FIXME: Using unsafe memory copies leads to rare, random crashes
  55. // with fuzz testing. It is therefore disabled for now.
  56. const useSafe = true
  57. /*
  58. useSafe := false
  59. if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
  60. useSafe = true
  61. }
  62. if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
  63. useSafe = true
  64. }
  65. if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
  66. useSafe = true
  67. }
  68. */
  69. br := s.br
  70. maxBlockSize := maxCompressedBlockSize
  71. if s.windowSize < maxBlockSize {
  72. maxBlockSize = s.windowSize
  73. }
  74. ctx := decodeSyncAsmContext{
  75. llTable: s.litLengths.fse.dt[:maxTablesize],
  76. mlTable: s.matchLengths.fse.dt[:maxTablesize],
  77. ofTable: s.offsets.fse.dt[:maxTablesize],
  78. llState: uint64(s.litLengths.state.state),
  79. mlState: uint64(s.matchLengths.state.state),
  80. ofState: uint64(s.offsets.state.state),
  81. iteration: s.nSeqs - 1,
  82. litRemain: len(s.literals),
  83. out: s.out,
  84. outPosition: len(s.out),
  85. literals: s.literals,
  86. windowSize: s.windowSize,
  87. history: hist,
  88. }
  89. s.seqSize = 0
  90. startSize := len(s.out)
  91. var errCode int
  92. if cpuinfo.HasBMI2() {
  93. if useSafe {
  94. errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
  95. } else {
  96. errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
  97. }
  98. } else {
  99. if useSafe {
  100. errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
  101. } else {
  102. errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
  103. }
  104. }
  105. switch errCode {
  106. case noError:
  107. break
  108. case errorMatchLenOfsMismatch:
  109. return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
  110. case errorMatchLenTooBig:
  111. return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
  112. case errorMatchOffTooBig:
  113. return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
  114. ctx.mo, ctx.outPosition+len(hist)-startSize)
  115. case errorNotEnoughLiterals:
  116. return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
  117. ctx.ll, ctx.litRemain+ctx.ll)
  118. case errorOverread:
  119. return true, io.ErrUnexpectedEOF
  120. case errorNotEnoughSpace:
  121. size := ctx.outPosition + ctx.ll + ctx.ml
  122. if debugDecoder {
  123. println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
  124. }
  125. return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
  126. default:
  127. return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
  128. }
  129. s.seqSize += ctx.litRemain
  130. if s.seqSize > maxBlockSize {
  131. return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
  132. }
  133. err := br.close()
  134. if err != nil {
  135. printf("Closing sequences: %v, %+v\n", err, *br)
  136. return true, err
  137. }
  138. s.literals = s.literals[ctx.litPosition:]
  139. t := ctx.outPosition
  140. s.out = s.out[:t]
  141. // Add final literals
  142. s.out = append(s.out, s.literals...)
  143. if debugDecoder {
  144. t += len(s.literals)
  145. if t != len(s.out) {
  146. panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
  147. }
  148. }
  149. return true, nil
  150. }
  151. // --------------------------------------------------------------------------------
  152. type decodeAsmContext struct {
  153. llTable []decSymbol
  154. mlTable []decSymbol
  155. ofTable []decSymbol
  156. llState uint64
  157. mlState uint64
  158. ofState uint64
  159. iteration int
  160. seqs []seqVals
  161. litRemain int
  162. }
  163. const noError = 0
  164. // error reported when mo == 0 && ml > 0
  165. const errorMatchLenOfsMismatch = 1
  166. // error reported when ml > maxMatchLen
  167. const errorMatchLenTooBig = 2
  168. // error reported when mo > available history or mo > s.windowSize
  169. const errorMatchOffTooBig = 3
  170. // error reported when the sum of literal lengths exeeceds the literal buffer size
  171. const errorNotEnoughLiterals = 4
  172. // error reported when capacity of `out` is too small
  173. const errorNotEnoughSpace = 5
  174. // error reported when bits are overread.
  175. const errorOverread = 6
  176. // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
  177. //
  178. // Please refer to seqdec_generic.go for the reference implementation.
  179. //
  180. //go:noescape
  181. func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
  182. // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
  183. //
  184. // Please refer to seqdec_generic.go for the reference implementation.
  185. //
  186. //go:noescape
  187. func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
  188. // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
  189. //
  190. //go:noescape
  191. func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
  192. // sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
  193. //
  194. //go:noescape
  195. func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
  196. // decode sequences from the stream without the provided history.
  197. func (s *sequenceDecs) decode(seqs []seqVals) error {
  198. br := s.br
  199. maxBlockSize := maxCompressedBlockSize
  200. if s.windowSize < maxBlockSize {
  201. maxBlockSize = s.windowSize
  202. }
  203. ctx := decodeAsmContext{
  204. llTable: s.litLengths.fse.dt[:maxTablesize],
  205. mlTable: s.matchLengths.fse.dt[:maxTablesize],
  206. ofTable: s.offsets.fse.dt[:maxTablesize],
  207. llState: uint64(s.litLengths.state.state),
  208. mlState: uint64(s.matchLengths.state.state),
  209. ofState: uint64(s.offsets.state.state),
  210. seqs: seqs,
  211. iteration: len(seqs) - 1,
  212. litRemain: len(s.literals),
  213. }
  214. if debugDecoder {
  215. println("decode: decoding", len(seqs), "sequences", br.remain(), "bits remain on stream")
  216. }
  217. s.seqSize = 0
  218. lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
  219. var errCode int
  220. if cpuinfo.HasBMI2() {
  221. if lte56bits {
  222. errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
  223. } else {
  224. errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
  225. }
  226. } else {
  227. if lte56bits {
  228. errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
  229. } else {
  230. errCode = sequenceDecs_decode_amd64(s, br, &ctx)
  231. }
  232. }
  233. if errCode != 0 {
  234. i := len(seqs) - ctx.iteration - 1
  235. switch errCode {
  236. case errorMatchLenOfsMismatch:
  237. ml := ctx.seqs[i].ml
  238. return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
  239. case errorMatchLenTooBig:
  240. ml := ctx.seqs[i].ml
  241. return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
  242. case errorNotEnoughLiterals:
  243. ll := ctx.seqs[i].ll
  244. return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
  245. case errorOverread:
  246. return io.ErrUnexpectedEOF
  247. }
  248. return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
  249. }
  250. if ctx.litRemain < 0 {
  251. return fmt.Errorf("literal count is too big: total available %d, total requested %d",
  252. len(s.literals), len(s.literals)-ctx.litRemain)
  253. }
  254. s.seqSize += ctx.litRemain
  255. if s.seqSize > maxBlockSize {
  256. return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
  257. }
  258. if debugDecoder {
  259. println("decode: ", br.remain(), "bits remain on stream. code:", errCode)
  260. }
  261. err := br.close()
  262. if err != nil {
  263. printf("Closing sequences: %v, %+v\n", err, *br)
  264. }
  265. return err
  266. }
  267. // --------------------------------------------------------------------------------
  268. type executeAsmContext struct {
  269. seqs []seqVals
  270. seqIndex int
  271. out []byte
  272. history []byte
  273. literals []byte
  274. outPosition int
  275. litPosition int
  276. windowSize int
  277. }
  278. // sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
  279. //
  280. // Returns false if a match offset is too big.
  281. //
  282. // Please refer to seqdec_generic.go for the reference implementation.
  283. //
  284. //go:noescape
  285. func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
  286. // Same as above, but with safe memcopies
  287. //
  288. //go:noescape
  289. func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
  290. // executeSimple handles cases when dictionary is not used.
  291. func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
  292. // Ensure we have enough output size...
  293. if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
  294. addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
  295. s.out = append(s.out, make([]byte, addBytes)...)
  296. s.out = s.out[:len(s.out)-addBytes]
  297. }
  298. if debugDecoder {
  299. printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
  300. }
  301. var t = len(s.out)
  302. out := s.out[:t+s.seqSize]
  303. ctx := executeAsmContext{
  304. seqs: seqs,
  305. seqIndex: 0,
  306. out: out,
  307. history: hist,
  308. outPosition: t,
  309. litPosition: 0,
  310. literals: s.literals,
  311. windowSize: s.windowSize,
  312. }
  313. var ok bool
  314. if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
  315. ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
  316. } else {
  317. ok = sequenceDecs_executeSimple_amd64(&ctx)
  318. }
  319. if !ok {
  320. return fmt.Errorf("match offset (%d) bigger than current history (%d)",
  321. seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
  322. }
  323. s.literals = s.literals[ctx.litPosition:]
  324. t = ctx.outPosition
  325. // Add final literals
  326. copy(out[t:], s.literals)
  327. if debugDecoder {
  328. t += len(s.literals)
  329. if t != len(out) {
  330. panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
  331. }
  332. }
  333. s.out = out
  334. return nil
  335. }