decompress.go 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167
  1. package huff0
  2. import (
  3. "errors"
  4. "fmt"
  5. "io"
  6. "sync"
  7. "github.com/klauspost/compress/fse"
  8. )
  9. type dTable struct {
  10. single []dEntrySingle
  11. }
  12. // single-symbols decoding
  13. type dEntrySingle struct {
  14. entry uint16
  15. }
  16. // Uses special code for all tables that are < 8 bits.
  17. const use8BitTables = true
  18. // ReadTable will read a table from the input.
  19. // The size of the input may be larger than the table definition.
  20. // Any content remaining after the table definition will be returned.
  21. // If no Scratch is provided a new one is allocated.
  22. // The returned Scratch can be used for encoding or decoding input using this table.
  23. func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
  24. s, err = s.prepare(nil)
  25. if err != nil {
  26. return s, nil, err
  27. }
  28. if len(in) <= 1 {
  29. return s, nil, errors.New("input too small for table")
  30. }
  31. iSize := in[0]
  32. in = in[1:]
  33. if iSize >= 128 {
  34. // Uncompressed
  35. oSize := iSize - 127
  36. iSize = (oSize + 1) / 2
  37. if int(iSize) > len(in) {
  38. return s, nil, errors.New("input too small for table")
  39. }
  40. for n := uint8(0); n < oSize; n += 2 {
  41. v := in[n/2]
  42. s.huffWeight[n] = v >> 4
  43. s.huffWeight[n+1] = v & 15
  44. }
  45. s.symbolLen = uint16(oSize)
  46. in = in[iSize:]
  47. } else {
  48. if len(in) < int(iSize) {
  49. return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
  50. }
  51. // FSE compressed weights
  52. s.fse.DecompressLimit = 255
  53. hw := s.huffWeight[:]
  54. s.fse.Out = hw
  55. b, err := fse.Decompress(in[:iSize], s.fse)
  56. s.fse.Out = nil
  57. if err != nil {
  58. return s, nil, fmt.Errorf("fse decompress returned: %w", err)
  59. }
  60. if len(b) > 255 {
  61. return s, nil, errors.New("corrupt input: output table too large")
  62. }
  63. s.symbolLen = uint16(len(b))
  64. in = in[iSize:]
  65. }
  66. // collect weight stats
  67. var rankStats [16]uint32
  68. weightTotal := uint32(0)
  69. for _, v := range s.huffWeight[:s.symbolLen] {
  70. if v > tableLogMax {
  71. return s, nil, errors.New("corrupt input: weight too large")
  72. }
  73. v2 := v & 15
  74. rankStats[v2]++
  75. // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
  76. weightTotal += (1 << v2) >> 1
  77. }
  78. if weightTotal == 0 {
  79. return s, nil, errors.New("corrupt input: weights zero")
  80. }
  81. // get last non-null symbol weight (implied, total must be 2^n)
  82. {
  83. tableLog := highBit32(weightTotal) + 1
  84. if tableLog > tableLogMax {
  85. return s, nil, errors.New("corrupt input: tableLog too big")
  86. }
  87. s.actualTableLog = uint8(tableLog)
  88. // determine last weight
  89. {
  90. total := uint32(1) << tableLog
  91. rest := total - weightTotal
  92. verif := uint32(1) << highBit32(rest)
  93. lastWeight := highBit32(rest) + 1
  94. if verif != rest {
  95. // last value must be a clean power of 2
  96. return s, nil, errors.New("corrupt input: last value not power of two")
  97. }
  98. s.huffWeight[s.symbolLen] = uint8(lastWeight)
  99. s.symbolLen++
  100. rankStats[lastWeight]++
  101. }
  102. }
  103. if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
  104. // by construction : at least 2 elts of rank 1, must be even
  105. return s, nil, errors.New("corrupt input: min elt size, even check failed ")
  106. }
  107. // TODO: Choose between single/double symbol decoding
  108. // Calculate starting value for each rank
  109. {
  110. var nextRankStart uint32
  111. for n := uint8(1); n < s.actualTableLog+1; n++ {
  112. current := nextRankStart
  113. nextRankStart += rankStats[n] << (n - 1)
  114. rankStats[n] = current
  115. }
  116. }
  117. // fill DTable (always full size)
  118. tSize := 1 << tableLogMax
  119. if len(s.dt.single) != tSize {
  120. s.dt.single = make([]dEntrySingle, tSize)
  121. }
  122. cTable := s.prevTable
  123. if cap(cTable) < maxSymbolValue+1 {
  124. cTable = make([]cTableEntry, 0, maxSymbolValue+1)
  125. }
  126. cTable = cTable[:maxSymbolValue+1]
  127. s.prevTable = cTable[:s.symbolLen]
  128. s.prevTableLog = s.actualTableLog
  129. for n, w := range s.huffWeight[:s.symbolLen] {
  130. if w == 0 {
  131. cTable[n] = cTableEntry{
  132. val: 0,
  133. nBits: 0,
  134. }
  135. continue
  136. }
  137. length := (uint32(1) << w) >> 1
  138. d := dEntrySingle{
  139. entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
  140. }
  141. rank := &rankStats[w]
  142. cTable[n] = cTableEntry{
  143. val: uint16(*rank >> (w - 1)),
  144. nBits: uint8(d.entry),
  145. }
  146. single := s.dt.single[*rank : *rank+length]
  147. for i := range single {
  148. single[i] = d
  149. }
  150. *rank += length
  151. }
  152. return s, in, nil
  153. }
  154. // Decompress1X will decompress a 1X encoded stream.
  155. // The length of the supplied input must match the end of a block exactly.
  156. // Before this is called, the table must be initialized with ReadTable unless
  157. // the encoder re-used the table.
  158. // deprecated: Use the stateless Decoder() to get a concurrent version.
  159. func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
  160. if cap(s.Out) < s.MaxDecodedSize {
  161. s.Out = make([]byte, s.MaxDecodedSize)
  162. }
  163. s.Out = s.Out[:0:s.MaxDecodedSize]
  164. s.Out, err = s.Decoder().Decompress1X(s.Out, in)
  165. return s.Out, err
  166. }
  167. // Decompress4X will decompress a 4X encoded stream.
  168. // Before this is called, the table must be initialized with ReadTable unless
  169. // the encoder re-used the table.
  170. // The length of the supplied input must match the end of a block exactly.
  171. // The destination size of the uncompressed data must be known and provided.
  172. // deprecated: Use the stateless Decoder() to get a concurrent version.
  173. func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
  174. if dstSize > s.MaxDecodedSize {
  175. return nil, ErrMaxDecodedSizeExceeded
  176. }
  177. if cap(s.Out) < dstSize {
  178. s.Out = make([]byte, s.MaxDecodedSize)
  179. }
  180. s.Out = s.Out[:0:dstSize]
  181. s.Out, err = s.Decoder().Decompress4X(s.Out, in)
  182. return s.Out, err
  183. }
  184. // Decoder will return a stateless decoder that can be used by multiple
  185. // decompressors concurrently.
  186. // Before this is called, the table must be initialized with ReadTable.
  187. // The Decoder is still linked to the scratch buffer so that cannot be reused.
  188. // However, it is safe to discard the scratch.
  189. func (s *Scratch) Decoder() *Decoder {
  190. return &Decoder{
  191. dt: s.dt,
  192. actualTableLog: s.actualTableLog,
  193. bufs: &s.decPool,
  194. }
  195. }
  196. // Decoder provides stateless decoding.
  197. type Decoder struct {
  198. dt dTable
  199. actualTableLog uint8
  200. bufs *sync.Pool
  201. }
  202. func (d *Decoder) buffer() *[4][256]byte {
  203. buf, ok := d.bufs.Get().(*[4][256]byte)
  204. if ok {
  205. return buf
  206. }
  207. return &[4][256]byte{}
  208. }
  209. // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
  210. // The cap of the output buffer will be the maximum decompressed size.
  211. // The length of the supplied input must match the end of a block exactly.
  212. func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
  213. if d.actualTableLog == 8 {
  214. return d.decompress1X8BitExactly(dst, src)
  215. }
  216. var br bitReaderBytes
  217. err := br.init(src)
  218. if err != nil {
  219. return dst, err
  220. }
  221. maxDecodedSize := cap(dst)
  222. dst = dst[:0]
  223. // Avoid bounds check by always having full sized table.
  224. dt := d.dt.single[:256]
  225. // Use temp table to avoid bound checks/append penalty.
  226. bufs := d.buffer()
  227. buf := &bufs[0]
  228. var off uint8
  229. switch d.actualTableLog {
  230. case 8:
  231. const shift = 0
  232. for br.off >= 4 {
  233. br.fillFast()
  234. v := dt[uint8(br.value>>(56+shift))]
  235. br.advance(uint8(v.entry))
  236. buf[off+0] = uint8(v.entry >> 8)
  237. v = dt[uint8(br.value>>(56+shift))]
  238. br.advance(uint8(v.entry))
  239. buf[off+1] = uint8(v.entry >> 8)
  240. v = dt[uint8(br.value>>(56+shift))]
  241. br.advance(uint8(v.entry))
  242. buf[off+2] = uint8(v.entry >> 8)
  243. v = dt[uint8(br.value>>(56+shift))]
  244. br.advance(uint8(v.entry))
  245. buf[off+3] = uint8(v.entry >> 8)
  246. off += 4
  247. if off == 0 {
  248. if len(dst)+256 > maxDecodedSize {
  249. br.close()
  250. d.bufs.Put(bufs)
  251. return nil, ErrMaxDecodedSizeExceeded
  252. }
  253. dst = append(dst, buf[:]...)
  254. }
  255. }
  256. case 7:
  257. const shift = 8 - 7
  258. for br.off >= 4 {
  259. br.fillFast()
  260. v := dt[uint8(br.value>>(56+shift))]
  261. br.advance(uint8(v.entry))
  262. buf[off+0] = uint8(v.entry >> 8)
  263. v = dt[uint8(br.value>>(56+shift))]
  264. br.advance(uint8(v.entry))
  265. buf[off+1] = uint8(v.entry >> 8)
  266. v = dt[uint8(br.value>>(56+shift))]
  267. br.advance(uint8(v.entry))
  268. buf[off+2] = uint8(v.entry >> 8)
  269. v = dt[uint8(br.value>>(56+shift))]
  270. br.advance(uint8(v.entry))
  271. buf[off+3] = uint8(v.entry >> 8)
  272. off += 4
  273. if off == 0 {
  274. if len(dst)+256 > maxDecodedSize {
  275. br.close()
  276. d.bufs.Put(bufs)
  277. return nil, ErrMaxDecodedSizeExceeded
  278. }
  279. dst = append(dst, buf[:]...)
  280. }
  281. }
  282. case 6:
  283. const shift = 8 - 6
  284. for br.off >= 4 {
  285. br.fillFast()
  286. v := dt[uint8(br.value>>(56+shift))]
  287. br.advance(uint8(v.entry))
  288. buf[off+0] = uint8(v.entry >> 8)
  289. v = dt[uint8(br.value>>(56+shift))]
  290. br.advance(uint8(v.entry))
  291. buf[off+1] = uint8(v.entry >> 8)
  292. v = dt[uint8(br.value>>(56+shift))]
  293. br.advance(uint8(v.entry))
  294. buf[off+2] = uint8(v.entry >> 8)
  295. v = dt[uint8(br.value>>(56+shift))]
  296. br.advance(uint8(v.entry))
  297. buf[off+3] = uint8(v.entry >> 8)
  298. off += 4
  299. if off == 0 {
  300. if len(dst)+256 > maxDecodedSize {
  301. d.bufs.Put(bufs)
  302. br.close()
  303. return nil, ErrMaxDecodedSizeExceeded
  304. }
  305. dst = append(dst, buf[:]...)
  306. }
  307. }
  308. case 5:
  309. const shift = 8 - 5
  310. for br.off >= 4 {
  311. br.fillFast()
  312. v := dt[uint8(br.value>>(56+shift))]
  313. br.advance(uint8(v.entry))
  314. buf[off+0] = uint8(v.entry >> 8)
  315. v = dt[uint8(br.value>>(56+shift))]
  316. br.advance(uint8(v.entry))
  317. buf[off+1] = uint8(v.entry >> 8)
  318. v = dt[uint8(br.value>>(56+shift))]
  319. br.advance(uint8(v.entry))
  320. buf[off+2] = uint8(v.entry >> 8)
  321. v = dt[uint8(br.value>>(56+shift))]
  322. br.advance(uint8(v.entry))
  323. buf[off+3] = uint8(v.entry >> 8)
  324. off += 4
  325. if off == 0 {
  326. if len(dst)+256 > maxDecodedSize {
  327. d.bufs.Put(bufs)
  328. br.close()
  329. return nil, ErrMaxDecodedSizeExceeded
  330. }
  331. dst = append(dst, buf[:]...)
  332. }
  333. }
  334. case 4:
  335. const shift = 8 - 4
  336. for br.off >= 4 {
  337. br.fillFast()
  338. v := dt[uint8(br.value>>(56+shift))]
  339. br.advance(uint8(v.entry))
  340. buf[off+0] = uint8(v.entry >> 8)
  341. v = dt[uint8(br.value>>(56+shift))]
  342. br.advance(uint8(v.entry))
  343. buf[off+1] = uint8(v.entry >> 8)
  344. v = dt[uint8(br.value>>(56+shift))]
  345. br.advance(uint8(v.entry))
  346. buf[off+2] = uint8(v.entry >> 8)
  347. v = dt[uint8(br.value>>(56+shift))]
  348. br.advance(uint8(v.entry))
  349. buf[off+3] = uint8(v.entry >> 8)
  350. off += 4
  351. if off == 0 {
  352. if len(dst)+256 > maxDecodedSize {
  353. d.bufs.Put(bufs)
  354. br.close()
  355. return nil, ErrMaxDecodedSizeExceeded
  356. }
  357. dst = append(dst, buf[:]...)
  358. }
  359. }
  360. case 3:
  361. const shift = 8 - 3
  362. for br.off >= 4 {
  363. br.fillFast()
  364. v := dt[uint8(br.value>>(56+shift))]
  365. br.advance(uint8(v.entry))
  366. buf[off+0] = uint8(v.entry >> 8)
  367. v = dt[uint8(br.value>>(56+shift))]
  368. br.advance(uint8(v.entry))
  369. buf[off+1] = uint8(v.entry >> 8)
  370. v = dt[uint8(br.value>>(56+shift))]
  371. br.advance(uint8(v.entry))
  372. buf[off+2] = uint8(v.entry >> 8)
  373. v = dt[uint8(br.value>>(56+shift))]
  374. br.advance(uint8(v.entry))
  375. buf[off+3] = uint8(v.entry >> 8)
  376. off += 4
  377. if off == 0 {
  378. if len(dst)+256 > maxDecodedSize {
  379. d.bufs.Put(bufs)
  380. br.close()
  381. return nil, ErrMaxDecodedSizeExceeded
  382. }
  383. dst = append(dst, buf[:]...)
  384. }
  385. }
  386. case 2:
  387. const shift = 8 - 2
  388. for br.off >= 4 {
  389. br.fillFast()
  390. v := dt[uint8(br.value>>(56+shift))]
  391. br.advance(uint8(v.entry))
  392. buf[off+0] = uint8(v.entry >> 8)
  393. v = dt[uint8(br.value>>(56+shift))]
  394. br.advance(uint8(v.entry))
  395. buf[off+1] = uint8(v.entry >> 8)
  396. v = dt[uint8(br.value>>(56+shift))]
  397. br.advance(uint8(v.entry))
  398. buf[off+2] = uint8(v.entry >> 8)
  399. v = dt[uint8(br.value>>(56+shift))]
  400. br.advance(uint8(v.entry))
  401. buf[off+3] = uint8(v.entry >> 8)
  402. off += 4
  403. if off == 0 {
  404. if len(dst)+256 > maxDecodedSize {
  405. d.bufs.Put(bufs)
  406. br.close()
  407. return nil, ErrMaxDecodedSizeExceeded
  408. }
  409. dst = append(dst, buf[:]...)
  410. }
  411. }
  412. case 1:
  413. const shift = 8 - 1
  414. for br.off >= 4 {
  415. br.fillFast()
  416. v := dt[uint8(br.value>>(56+shift))]
  417. br.advance(uint8(v.entry))
  418. buf[off+0] = uint8(v.entry >> 8)
  419. v = dt[uint8(br.value>>(56+shift))]
  420. br.advance(uint8(v.entry))
  421. buf[off+1] = uint8(v.entry >> 8)
  422. v = dt[uint8(br.value>>(56+shift))]
  423. br.advance(uint8(v.entry))
  424. buf[off+2] = uint8(v.entry >> 8)
  425. v = dt[uint8(br.value>>(56+shift))]
  426. br.advance(uint8(v.entry))
  427. buf[off+3] = uint8(v.entry >> 8)
  428. off += 4
  429. if off == 0 {
  430. if len(dst)+256 > maxDecodedSize {
  431. d.bufs.Put(bufs)
  432. br.close()
  433. return nil, ErrMaxDecodedSizeExceeded
  434. }
  435. dst = append(dst, buf[:]...)
  436. }
  437. }
  438. default:
  439. d.bufs.Put(bufs)
  440. return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
  441. }
  442. if len(dst)+int(off) > maxDecodedSize {
  443. d.bufs.Put(bufs)
  444. br.close()
  445. return nil, ErrMaxDecodedSizeExceeded
  446. }
  447. dst = append(dst, buf[:off]...)
  448. // br < 4, so uint8 is fine
  449. bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
  450. shift := (8 - d.actualTableLog) & 7
  451. for bitsLeft > 0 {
  452. if br.bitsRead >= 64-8 {
  453. for br.off > 0 {
  454. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  455. br.bitsRead -= 8
  456. br.off--
  457. }
  458. }
  459. if len(dst) >= maxDecodedSize {
  460. br.close()
  461. d.bufs.Put(bufs)
  462. return nil, ErrMaxDecodedSizeExceeded
  463. }
  464. v := dt[br.peekByteFast()>>shift]
  465. nBits := uint8(v.entry)
  466. br.advance(nBits)
  467. bitsLeft -= int8(nBits)
  468. dst = append(dst, uint8(v.entry>>8))
  469. }
  470. d.bufs.Put(bufs)
  471. return dst, br.close()
  472. }
  473. // decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
  474. // The cap of the output buffer will be the maximum decompressed size.
  475. // The length of the supplied input must match the end of a block exactly.
  476. func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
  477. var br bitReaderBytes
  478. err := br.init(src)
  479. if err != nil {
  480. return dst, err
  481. }
  482. maxDecodedSize := cap(dst)
  483. dst = dst[:0]
  484. // Avoid bounds check by always having full sized table.
  485. dt := d.dt.single[:256]
  486. // Use temp table to avoid bound checks/append penalty.
  487. bufs := d.buffer()
  488. buf := &bufs[0]
  489. var off uint8
  490. const shift = 56
  491. //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
  492. for br.off >= 4 {
  493. br.fillFast()
  494. v := dt[uint8(br.value>>shift)]
  495. br.advance(uint8(v.entry))
  496. buf[off+0] = uint8(v.entry >> 8)
  497. v = dt[uint8(br.value>>shift)]
  498. br.advance(uint8(v.entry))
  499. buf[off+1] = uint8(v.entry >> 8)
  500. v = dt[uint8(br.value>>shift)]
  501. br.advance(uint8(v.entry))
  502. buf[off+2] = uint8(v.entry >> 8)
  503. v = dt[uint8(br.value>>shift)]
  504. br.advance(uint8(v.entry))
  505. buf[off+3] = uint8(v.entry >> 8)
  506. off += 4
  507. if off == 0 {
  508. if len(dst)+256 > maxDecodedSize {
  509. d.bufs.Put(bufs)
  510. br.close()
  511. return nil, ErrMaxDecodedSizeExceeded
  512. }
  513. dst = append(dst, buf[:]...)
  514. }
  515. }
  516. if len(dst)+int(off) > maxDecodedSize {
  517. d.bufs.Put(bufs)
  518. br.close()
  519. return nil, ErrMaxDecodedSizeExceeded
  520. }
  521. dst = append(dst, buf[:off]...)
  522. // br < 4, so uint8 is fine
  523. bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
  524. for bitsLeft > 0 {
  525. if br.bitsRead >= 64-8 {
  526. for br.off > 0 {
  527. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  528. br.bitsRead -= 8
  529. br.off--
  530. }
  531. }
  532. if len(dst) >= maxDecodedSize {
  533. d.bufs.Put(bufs)
  534. br.close()
  535. return nil, ErrMaxDecodedSizeExceeded
  536. }
  537. v := dt[br.peekByteFast()]
  538. nBits := uint8(v.entry)
  539. br.advance(nBits)
  540. bitsLeft -= int8(nBits)
  541. dst = append(dst, uint8(v.entry>>8))
  542. }
  543. d.bufs.Put(bufs)
  544. return dst, br.close()
  545. }
  546. // Decompress4X will decompress a 4X encoded stream.
  547. // The length of the supplied input must match the end of a block exactly.
  548. // The *capacity* of the dst slice must match the destination size of
  549. // the uncompressed data exactly.
  550. func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
  551. if d.actualTableLog == 8 {
  552. return d.decompress4X8bitExactly(dst, src)
  553. }
  554. var br [4]bitReaderBytes
  555. start := 6
  556. for i := 0; i < 3; i++ {
  557. length := int(src[i*2]) | (int(src[i*2+1]) << 8)
  558. if start+length >= len(src) {
  559. return nil, errors.New("truncated input (or invalid offset)")
  560. }
  561. err := br[i].init(src[start : start+length])
  562. if err != nil {
  563. return nil, err
  564. }
  565. start += length
  566. }
  567. err := br[3].init(src[start:])
  568. if err != nil {
  569. return nil, err
  570. }
  571. // destination, offset to match first output
  572. dstSize := cap(dst)
  573. dst = dst[:dstSize]
  574. out := dst
  575. dstEvery := (dstSize + 3) / 4
  576. shift := (56 + (8 - d.actualTableLog)) & 63
  577. const tlSize = 1 << 8
  578. single := d.dt.single[:tlSize]
  579. // Use temp table to avoid bound checks/append penalty.
  580. buf := d.buffer()
  581. var off uint8
  582. var decoded int
  583. // Decode 4 values from each decoder/loop.
  584. const bufoff = 256
  585. for {
  586. if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
  587. break
  588. }
  589. {
  590. // Interleave 2 decodes.
  591. const stream = 0
  592. const stream2 = 1
  593. br1 := &br[stream]
  594. br2 := &br[stream2]
  595. br1.fillFast()
  596. br2.fillFast()
  597. v := single[uint8(br1.value>>shift)].entry
  598. v2 := single[uint8(br2.value>>shift)].entry
  599. br1.bitsRead += uint8(v)
  600. br1.value <<= v & 63
  601. br2.bitsRead += uint8(v2)
  602. br2.value <<= v2 & 63
  603. buf[stream][off] = uint8(v >> 8)
  604. buf[stream2][off] = uint8(v2 >> 8)
  605. v = single[uint8(br1.value>>shift)].entry
  606. v2 = single[uint8(br2.value>>shift)].entry
  607. br1.bitsRead += uint8(v)
  608. br1.value <<= v & 63
  609. br2.bitsRead += uint8(v2)
  610. br2.value <<= v2 & 63
  611. buf[stream][off+1] = uint8(v >> 8)
  612. buf[stream2][off+1] = uint8(v2 >> 8)
  613. v = single[uint8(br1.value>>shift)].entry
  614. v2 = single[uint8(br2.value>>shift)].entry
  615. br1.bitsRead += uint8(v)
  616. br1.value <<= v & 63
  617. br2.bitsRead += uint8(v2)
  618. br2.value <<= v2 & 63
  619. buf[stream][off+2] = uint8(v >> 8)
  620. buf[stream2][off+2] = uint8(v2 >> 8)
  621. v = single[uint8(br1.value>>shift)].entry
  622. v2 = single[uint8(br2.value>>shift)].entry
  623. br1.bitsRead += uint8(v)
  624. br1.value <<= v & 63
  625. br2.bitsRead += uint8(v2)
  626. br2.value <<= v2 & 63
  627. buf[stream][off+3] = uint8(v >> 8)
  628. buf[stream2][off+3] = uint8(v2 >> 8)
  629. }
  630. {
  631. const stream = 2
  632. const stream2 = 3
  633. br1 := &br[stream]
  634. br2 := &br[stream2]
  635. br1.fillFast()
  636. br2.fillFast()
  637. v := single[uint8(br1.value>>shift)].entry
  638. v2 := single[uint8(br2.value>>shift)].entry
  639. br1.bitsRead += uint8(v)
  640. br1.value <<= v & 63
  641. br2.bitsRead += uint8(v2)
  642. br2.value <<= v2 & 63
  643. buf[stream][off] = uint8(v >> 8)
  644. buf[stream2][off] = uint8(v2 >> 8)
  645. v = single[uint8(br1.value>>shift)].entry
  646. v2 = single[uint8(br2.value>>shift)].entry
  647. br1.bitsRead += uint8(v)
  648. br1.value <<= v & 63
  649. br2.bitsRead += uint8(v2)
  650. br2.value <<= v2 & 63
  651. buf[stream][off+1] = uint8(v >> 8)
  652. buf[stream2][off+1] = uint8(v2 >> 8)
  653. v = single[uint8(br1.value>>shift)].entry
  654. v2 = single[uint8(br2.value>>shift)].entry
  655. br1.bitsRead += uint8(v)
  656. br1.value <<= v & 63
  657. br2.bitsRead += uint8(v2)
  658. br2.value <<= v2 & 63
  659. buf[stream][off+2] = uint8(v >> 8)
  660. buf[stream2][off+2] = uint8(v2 >> 8)
  661. v = single[uint8(br1.value>>shift)].entry
  662. v2 = single[uint8(br2.value>>shift)].entry
  663. br1.bitsRead += uint8(v)
  664. br1.value <<= v & 63
  665. br2.bitsRead += uint8(v2)
  666. br2.value <<= v2 & 63
  667. buf[stream][off+3] = uint8(v >> 8)
  668. buf[stream2][off+3] = uint8(v2 >> 8)
  669. }
  670. off += 4
  671. if off == 0 {
  672. if bufoff > dstEvery {
  673. d.bufs.Put(buf)
  674. return nil, errors.New("corruption detected: stream overrun 1")
  675. }
  676. // There must at least be 3 buffers left.
  677. if len(out)-bufoff < dstEvery*3 {
  678. d.bufs.Put(buf)
  679. return nil, errors.New("corruption detected: stream overrun 2")
  680. }
  681. //copy(out, buf[0][:])
  682. //copy(out[dstEvery:], buf[1][:])
  683. //copy(out[dstEvery*2:], buf[2][:])
  684. *(*[bufoff]byte)(out) = buf[0]
  685. *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
  686. *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
  687. *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
  688. out = out[bufoff:]
  689. decoded += bufoff * 4
  690. }
  691. }
  692. if off > 0 {
  693. ioff := int(off)
  694. if len(out) < dstEvery*3+ioff {
  695. d.bufs.Put(buf)
  696. return nil, errors.New("corruption detected: stream overrun 3")
  697. }
  698. copy(out, buf[0][:off])
  699. copy(out[dstEvery:], buf[1][:off])
  700. copy(out[dstEvery*2:], buf[2][:off])
  701. copy(out[dstEvery*3:], buf[3][:off])
  702. decoded += int(off) * 4
  703. out = out[off:]
  704. }
  705. // Decode remaining.
  706. // Decode remaining.
  707. remainBytes := dstEvery - (decoded / 4)
  708. for i := range br {
  709. offset := dstEvery * i
  710. endsAt := offset + remainBytes
  711. if endsAt > len(out) {
  712. endsAt = len(out)
  713. }
  714. br := &br[i]
  715. bitsLeft := br.remaining()
  716. for bitsLeft > 0 {
  717. if br.finished() {
  718. d.bufs.Put(buf)
  719. return nil, io.ErrUnexpectedEOF
  720. }
  721. if br.bitsRead >= 56 {
  722. if br.off >= 4 {
  723. v := br.in[br.off-4:]
  724. v = v[:4]
  725. low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
  726. br.value |= uint64(low) << (br.bitsRead - 32)
  727. br.bitsRead -= 32
  728. br.off -= 4
  729. } else {
  730. for br.off > 0 {
  731. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  732. br.bitsRead -= 8
  733. br.off--
  734. }
  735. }
  736. }
  737. // end inline...
  738. if offset >= endsAt {
  739. d.bufs.Put(buf)
  740. return nil, errors.New("corruption detected: stream overrun 4")
  741. }
  742. // Read value and increment offset.
  743. v := single[uint8(br.value>>shift)].entry
  744. nBits := uint8(v)
  745. br.advance(nBits)
  746. bitsLeft -= uint(nBits)
  747. out[offset] = uint8(v >> 8)
  748. offset++
  749. }
  750. if offset != endsAt {
  751. d.bufs.Put(buf)
  752. return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
  753. }
  754. decoded += offset - dstEvery*i
  755. err = br.close()
  756. if err != nil {
  757. d.bufs.Put(buf)
  758. return nil, err
  759. }
  760. }
  761. d.bufs.Put(buf)
  762. if dstSize != decoded {
  763. return nil, errors.New("corruption detected: short output block")
  764. }
  765. return dst, nil
  766. }
  767. // Decompress4X will decompress a 4X encoded stream.
  768. // The length of the supplied input must match the end of a block exactly.
  769. // The *capacity* of the dst slice must match the destination size of
  770. // the uncompressed data exactly.
  771. func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
  772. var br [4]bitReaderBytes
  773. start := 6
  774. for i := 0; i < 3; i++ {
  775. length := int(src[i*2]) | (int(src[i*2+1]) << 8)
  776. if start+length >= len(src) {
  777. return nil, errors.New("truncated input (or invalid offset)")
  778. }
  779. err := br[i].init(src[start : start+length])
  780. if err != nil {
  781. return nil, err
  782. }
  783. start += length
  784. }
  785. err := br[3].init(src[start:])
  786. if err != nil {
  787. return nil, err
  788. }
  789. // destination, offset to match first output
  790. dstSize := cap(dst)
  791. dst = dst[:dstSize]
  792. out := dst
  793. dstEvery := (dstSize + 3) / 4
  794. const shift = 56
  795. const tlSize = 1 << 8
  796. single := d.dt.single[:tlSize]
  797. // Use temp table to avoid bound checks/append penalty.
  798. buf := d.buffer()
  799. var off uint8
  800. var decoded int
  801. // Decode 4 values from each decoder/loop.
  802. const bufoff = 256
  803. for {
  804. if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
  805. break
  806. }
  807. {
  808. // Interleave 2 decodes.
  809. const stream = 0
  810. const stream2 = 1
  811. br1 := &br[stream]
  812. br2 := &br[stream2]
  813. br1.fillFast()
  814. br2.fillFast()
  815. v := single[uint8(br1.value>>shift)].entry
  816. v2 := single[uint8(br2.value>>shift)].entry
  817. br1.bitsRead += uint8(v)
  818. br1.value <<= v & 63
  819. br2.bitsRead += uint8(v2)
  820. br2.value <<= v2 & 63
  821. buf[stream][off] = uint8(v >> 8)
  822. buf[stream2][off] = uint8(v2 >> 8)
  823. v = single[uint8(br1.value>>shift)].entry
  824. v2 = single[uint8(br2.value>>shift)].entry
  825. br1.bitsRead += uint8(v)
  826. br1.value <<= v & 63
  827. br2.bitsRead += uint8(v2)
  828. br2.value <<= v2 & 63
  829. buf[stream][off+1] = uint8(v >> 8)
  830. buf[stream2][off+1] = uint8(v2 >> 8)
  831. v = single[uint8(br1.value>>shift)].entry
  832. v2 = single[uint8(br2.value>>shift)].entry
  833. br1.bitsRead += uint8(v)
  834. br1.value <<= v & 63
  835. br2.bitsRead += uint8(v2)
  836. br2.value <<= v2 & 63
  837. buf[stream][off+2] = uint8(v >> 8)
  838. buf[stream2][off+2] = uint8(v2 >> 8)
  839. v = single[uint8(br1.value>>shift)].entry
  840. v2 = single[uint8(br2.value>>shift)].entry
  841. br1.bitsRead += uint8(v)
  842. br1.value <<= v & 63
  843. br2.bitsRead += uint8(v2)
  844. br2.value <<= v2 & 63
  845. buf[stream][off+3] = uint8(v >> 8)
  846. buf[stream2][off+3] = uint8(v2 >> 8)
  847. }
  848. {
  849. const stream = 2
  850. const stream2 = 3
  851. br1 := &br[stream]
  852. br2 := &br[stream2]
  853. br1.fillFast()
  854. br2.fillFast()
  855. v := single[uint8(br1.value>>shift)].entry
  856. v2 := single[uint8(br2.value>>shift)].entry
  857. br1.bitsRead += uint8(v)
  858. br1.value <<= v & 63
  859. br2.bitsRead += uint8(v2)
  860. br2.value <<= v2 & 63
  861. buf[stream][off] = uint8(v >> 8)
  862. buf[stream2][off] = uint8(v2 >> 8)
  863. v = single[uint8(br1.value>>shift)].entry
  864. v2 = single[uint8(br2.value>>shift)].entry
  865. br1.bitsRead += uint8(v)
  866. br1.value <<= v & 63
  867. br2.bitsRead += uint8(v2)
  868. br2.value <<= v2 & 63
  869. buf[stream][off+1] = uint8(v >> 8)
  870. buf[stream2][off+1] = uint8(v2 >> 8)
  871. v = single[uint8(br1.value>>shift)].entry
  872. v2 = single[uint8(br2.value>>shift)].entry
  873. br1.bitsRead += uint8(v)
  874. br1.value <<= v & 63
  875. br2.bitsRead += uint8(v2)
  876. br2.value <<= v2 & 63
  877. buf[stream][off+2] = uint8(v >> 8)
  878. buf[stream2][off+2] = uint8(v2 >> 8)
  879. v = single[uint8(br1.value>>shift)].entry
  880. v2 = single[uint8(br2.value>>shift)].entry
  881. br1.bitsRead += uint8(v)
  882. br1.value <<= v & 63
  883. br2.bitsRead += uint8(v2)
  884. br2.value <<= v2 & 63
  885. buf[stream][off+3] = uint8(v >> 8)
  886. buf[stream2][off+3] = uint8(v2 >> 8)
  887. }
  888. off += 4
  889. if off == 0 {
  890. if bufoff > dstEvery {
  891. d.bufs.Put(buf)
  892. return nil, errors.New("corruption detected: stream overrun 1")
  893. }
  894. // There must at least be 3 buffers left.
  895. if len(out)-bufoff < dstEvery*3 {
  896. d.bufs.Put(buf)
  897. return nil, errors.New("corruption detected: stream overrun 2")
  898. }
  899. //copy(out, buf[0][:])
  900. //copy(out[dstEvery:], buf[1][:])
  901. //copy(out[dstEvery*2:], buf[2][:])
  902. // copy(out[dstEvery*3:], buf[3][:])
  903. *(*[bufoff]byte)(out) = buf[0]
  904. *(*[bufoff]byte)(out[dstEvery:]) = buf[1]
  905. *(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
  906. *(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
  907. out = out[bufoff:]
  908. decoded += bufoff * 4
  909. }
  910. }
  911. if off > 0 {
  912. ioff := int(off)
  913. if len(out) < dstEvery*3+ioff {
  914. return nil, errors.New("corruption detected: stream overrun 3")
  915. }
  916. copy(out, buf[0][:off])
  917. copy(out[dstEvery:], buf[1][:off])
  918. copy(out[dstEvery*2:], buf[2][:off])
  919. copy(out[dstEvery*3:], buf[3][:off])
  920. decoded += int(off) * 4
  921. out = out[off:]
  922. }
  923. // Decode remaining.
  924. remainBytes := dstEvery - (decoded / 4)
  925. for i := range br {
  926. offset := dstEvery * i
  927. endsAt := offset + remainBytes
  928. if endsAt > len(out) {
  929. endsAt = len(out)
  930. }
  931. br := &br[i]
  932. bitsLeft := br.remaining()
  933. for bitsLeft > 0 {
  934. if br.finished() {
  935. d.bufs.Put(buf)
  936. return nil, io.ErrUnexpectedEOF
  937. }
  938. if br.bitsRead >= 56 {
  939. if br.off >= 4 {
  940. v := br.in[br.off-4:]
  941. v = v[:4]
  942. low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
  943. br.value |= uint64(low) << (br.bitsRead - 32)
  944. br.bitsRead -= 32
  945. br.off -= 4
  946. } else {
  947. for br.off > 0 {
  948. br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
  949. br.bitsRead -= 8
  950. br.off--
  951. }
  952. }
  953. }
  954. // end inline...
  955. if offset >= endsAt {
  956. d.bufs.Put(buf)
  957. return nil, errors.New("corruption detected: stream overrun 4")
  958. }
  959. // Read value and increment offset.
  960. v := single[br.peekByteFast()].entry
  961. nBits := uint8(v)
  962. br.advance(nBits)
  963. bitsLeft -= uint(nBits)
  964. out[offset] = uint8(v >> 8)
  965. offset++
  966. }
  967. if offset != endsAt {
  968. d.bufs.Put(buf)
  969. return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
  970. }
  971. decoded += offset - dstEvery*i
  972. err = br.close()
  973. if err != nil {
  974. d.bufs.Put(buf)
  975. return nil, err
  976. }
  977. }
  978. d.bufs.Put(buf)
  979. if dstSize != decoded {
  980. return nil, errors.New("corruption detected: short output block")
  981. }
  982. return dst, nil
  983. }
  984. // matches will compare a decoding table to a coding table.
  985. // Errors are written to the writer.
  986. // Nothing will be written if table is ok.
  987. func (s *Scratch) matches(ct cTable, w io.Writer) {
  988. if s == nil || len(s.dt.single) == 0 {
  989. return
  990. }
  991. dt := s.dt.single[:1<<s.actualTableLog]
  992. tablelog := s.actualTableLog
  993. ok := 0
  994. broken := 0
  995. for sym, enc := range ct {
  996. errs := 0
  997. broken++
  998. if enc.nBits == 0 {
  999. for _, dec := range dt {
  1000. if uint8(dec.entry>>8) == byte(sym) {
  1001. fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
  1002. errs++
  1003. break
  1004. }
  1005. }
  1006. if errs == 0 {
  1007. broken--
  1008. }
  1009. continue
  1010. }
  1011. // Unused bits in input
  1012. ub := tablelog - enc.nBits
  1013. top := enc.val << ub
  1014. // decoder looks at top bits.
  1015. dec := dt[top]
  1016. if uint8(dec.entry) != enc.nBits {
  1017. fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
  1018. errs++
  1019. }
  1020. if uint8(dec.entry>>8) != uint8(sym) {
  1021. fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
  1022. errs++
  1023. }
  1024. if errs > 0 {
  1025. fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
  1026. continue
  1027. }
  1028. // Ensure that all combinations are covered.
  1029. for i := uint16(0); i < (1 << ub); i++ {
  1030. vval := top | i
  1031. dec := dt[vval]
  1032. if uint8(dec.entry) != enc.nBits {
  1033. fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
  1034. errs++
  1035. }
  1036. if uint8(dec.entry>>8) != uint8(sym) {
  1037. fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
  1038. errs++
  1039. }
  1040. if errs > 20 {
  1041. fmt.Fprintf(w, "%d errros, stopping\n", errs)
  1042. break
  1043. }
  1044. }
  1045. if errs == 0 {
  1046. ok++
  1047. broken--
  1048. }
  1049. }
  1050. if broken > 0 {
  1051. fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
  1052. }
  1053. }