fse_decoder_amd64.s 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. // Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
  2. //go:build !appengine && !noasm && gc && !noasm
  3. // func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
  4. TEXT ·buildDtable_asm(SB), $0-24
  5. MOVQ ctx+8(FP), CX
  6. MOVQ s+0(FP), DI
  7. // Load values
  8. MOVBQZX 4098(DI), DX
  9. XORQ AX, AX
  10. BTSQ DX, AX
  11. MOVQ (CX), BX
  12. MOVQ 16(CX), SI
  13. LEAQ -1(AX), R8
  14. MOVQ 8(CX), CX
  15. MOVWQZX 4096(DI), DI
  16. // End load values
  17. // Init, lay down lowprob symbols
  18. XORQ R9, R9
  19. JMP init_main_loop_condition
  20. init_main_loop:
  21. MOVWQSX (CX)(R9*2), R10
  22. CMPW R10, $-1
  23. JNE do_not_update_high_threshold
  24. MOVB R9, 1(SI)(R8*8)
  25. DECQ R8
  26. MOVQ $0x0000000000000001, R10
  27. do_not_update_high_threshold:
  28. MOVW R10, (BX)(R9*2)
  29. INCQ R9
  30. init_main_loop_condition:
  31. CMPQ R9, DI
  32. JL init_main_loop
  33. // Spread symbols
  34. // Calculate table step
  35. MOVQ AX, R9
  36. SHRQ $0x01, R9
  37. MOVQ AX, R10
  38. SHRQ $0x03, R10
  39. LEAQ 3(R9)(R10*1), R9
  40. // Fill add bits values
  41. LEAQ -1(AX), R10
  42. XORQ R11, R11
  43. XORQ R12, R12
  44. JMP spread_main_loop_condition
  45. spread_main_loop:
  46. XORQ R13, R13
  47. MOVWQSX (CX)(R12*2), R14
  48. JMP spread_inner_loop_condition
  49. spread_inner_loop:
  50. MOVB R12, 1(SI)(R11*8)
  51. adjust_position:
  52. ADDQ R9, R11
  53. ANDQ R10, R11
  54. CMPQ R11, R8
  55. JG adjust_position
  56. INCQ R13
  57. spread_inner_loop_condition:
  58. CMPQ R13, R14
  59. JL spread_inner_loop
  60. INCQ R12
  61. spread_main_loop_condition:
  62. CMPQ R12, DI
  63. JL spread_main_loop
  64. TESTQ R11, R11
  65. JZ spread_check_ok
  66. MOVQ ctx+8(FP), AX
  67. MOVQ R11, 24(AX)
  68. MOVQ $+1, ret+16(FP)
  69. RET
  70. spread_check_ok:
  71. // Build Decoding table
  72. XORQ DI, DI
  73. build_table_main_table:
  74. MOVBQZX 1(SI)(DI*8), CX
  75. MOVWQZX (BX)(CX*2), R8
  76. LEAQ 1(R8), R9
  77. MOVW R9, (BX)(CX*2)
  78. MOVQ R8, R9
  79. BSRQ R9, R9
  80. MOVQ DX, CX
  81. SUBQ R9, CX
  82. SHLQ CL, R8
  83. SUBQ AX, R8
  84. MOVB CL, (SI)(DI*8)
  85. MOVW R8, 2(SI)(DI*8)
  86. CMPQ R8, AX
  87. JLE build_table_check1_ok
  88. MOVQ ctx+8(FP), CX
  89. MOVQ R8, 24(CX)
  90. MOVQ AX, 32(CX)
  91. MOVQ $+2, ret+16(FP)
  92. RET
  93. build_table_check1_ok:
  94. TESTB CL, CL
  95. JNZ build_table_check2_ok
  96. CMPW R8, DI
  97. JNE build_table_check2_ok
  98. MOVQ ctx+8(FP), AX
  99. MOVQ R8, 24(AX)
  100. MOVQ DI, 32(AX)
  101. MOVQ $+3, ret+16(FP)
  102. RET
  103. build_table_check2_ok:
  104. INCQ DI
  105. CMPQ DI, AX
  106. JL build_table_main_table
  107. MOVQ $+0, ret+16(FP)
  108. RET