match.go 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. // Package match provides a simple pattern matcher with unicode support.
  2. package match
  3. import (
  4. "unicode/utf8"
  5. )
  6. // Match returns true if str matches pattern. This is a very
  7. // simple wildcard match where '*' matches on any number characters
  8. // and '?' matches on any one character.
  9. //
  10. // pattern:
  11. // { term }
  12. // term:
  13. // '*' matches any sequence of non-Separator characters
  14. // '?' matches any single non-Separator character
  15. // c matches character c (c != '*', '?', '\\')
  16. // '\\' c matches character c
  17. //
  18. func Match(str, pattern string) bool {
  19. if pattern == "*" {
  20. return true
  21. }
  22. return match(str, pattern, 0, nil, -1) == rMatch
  23. }
  24. // MatchLimit is the same as Match but will limit the complexity of the match
  25. // operation. This is to avoid long running matches, specifically to avoid ReDos
  26. // attacks from arbritary inputs.
  27. //
  28. // How it works:
  29. // The underlying match routine is recursive and may call itself when it
  30. // encounters a sandwiched wildcard pattern, such as: `user:*:name`.
  31. // Everytime it calls itself a counter is incremented.
  32. // The operation is stopped when counter > maxcomp*len(str).
  33. func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
  34. if pattern == "*" {
  35. return true, false
  36. }
  37. counter := 0
  38. r := match(str, pattern, len(str), &counter, maxcomp)
  39. if r == rStop {
  40. return false, true
  41. }
  42. return r == rMatch, false
  43. }
  44. type result int
  45. const (
  46. rNoMatch result = iota
  47. rMatch
  48. rStop
  49. )
  50. func match(str, pat string, slen int, counter *int, maxcomp int) result {
  51. // check complexity limit
  52. if maxcomp > -1 {
  53. if *counter > slen*maxcomp {
  54. return rStop
  55. }
  56. *counter++
  57. }
  58. for len(pat) > 0 {
  59. var wild bool
  60. pc, ps := rune(pat[0]), 1
  61. if pc > 0x7f {
  62. pc, ps = utf8.DecodeRuneInString(pat)
  63. }
  64. var sc rune
  65. var ss int
  66. if len(str) > 0 {
  67. sc, ss = rune(str[0]), 1
  68. if sc > 0x7f {
  69. sc, ss = utf8.DecodeRuneInString(str)
  70. }
  71. }
  72. switch pc {
  73. case '?':
  74. if ss == 0 {
  75. return rNoMatch
  76. }
  77. case '*':
  78. // Ignore repeating stars.
  79. for len(pat) > 1 && pat[1] == '*' {
  80. pat = pat[1:]
  81. }
  82. // If this star is the last character then it must be a match.
  83. if len(pat) == 1 {
  84. return rMatch
  85. }
  86. // Match and trim any non-wildcard suffix characters.
  87. var ok bool
  88. str, pat, ok = matchTrimSuffix(str, pat)
  89. if !ok {
  90. return rNoMatch
  91. }
  92. // Check for single star again.
  93. if len(pat) == 1 {
  94. return rMatch
  95. }
  96. // Perform recursive wildcard search.
  97. r := match(str, pat[1:], slen, counter, maxcomp)
  98. if r != rNoMatch {
  99. return r
  100. }
  101. if len(str) == 0 {
  102. return rNoMatch
  103. }
  104. wild = true
  105. default:
  106. if ss == 0 {
  107. return rNoMatch
  108. }
  109. if pc == '\\' {
  110. pat = pat[ps:]
  111. pc, ps = utf8.DecodeRuneInString(pat)
  112. if ps == 0 {
  113. return rNoMatch
  114. }
  115. }
  116. if sc != pc {
  117. return rNoMatch
  118. }
  119. }
  120. str = str[ss:]
  121. if !wild {
  122. pat = pat[ps:]
  123. }
  124. }
  125. if len(str) == 0 {
  126. return rMatch
  127. }
  128. return rNoMatch
  129. }
  130. // matchTrimSuffix matches and trims any non-wildcard suffix characters.
  131. // Returns the trimed string and pattern.
  132. //
  133. // This is called because the pattern contains extra data after the wildcard
  134. // star. Here we compare any suffix characters in the pattern to the suffix of
  135. // the target string. Basically a reverse match that stops when a wildcard
  136. // character is reached. This is a little trickier than a forward match because
  137. // we need to evaluate an escaped character in reverse.
  138. //
  139. // Any matched characters will be trimmed from both the target
  140. // string and the pattern.
  141. func matchTrimSuffix(str, pat string) (string, string, bool) {
  142. // It's expected that the pattern has at least two bytes and the first byte
  143. // is a wildcard star '*'
  144. match := true
  145. for len(str) > 0 && len(pat) > 1 {
  146. pc, ps := utf8.DecodeLastRuneInString(pat)
  147. var esc bool
  148. for i := 0; ; i++ {
  149. if pat[len(pat)-ps-i-1] != '\\' {
  150. if i&1 == 1 {
  151. esc = true
  152. ps++
  153. }
  154. break
  155. }
  156. }
  157. if pc == '*' && !esc {
  158. match = true
  159. break
  160. }
  161. sc, ss := utf8.DecodeLastRuneInString(str)
  162. if !((pc == '?' && !esc) || pc == sc) {
  163. match = false
  164. break
  165. }
  166. str = str[:len(str)-ss]
  167. pat = pat[:len(pat)-ps]
  168. }
  169. return str, pat, match
  170. }
  171. var maxRuneBytes = [...]byte{244, 143, 191, 191}
  172. // Allowable parses the pattern and determines the minimum and maximum allowable
  173. // values that the pattern can represent.
  174. // When the max cannot be determined, 'true' will be returned
  175. // for infinite.
  176. func Allowable(pattern string) (min, max string) {
  177. if pattern == "" || pattern[0] == '*' {
  178. return "", ""
  179. }
  180. minb := make([]byte, 0, len(pattern))
  181. maxb := make([]byte, 0, len(pattern))
  182. var wild bool
  183. for i := 0; i < len(pattern); i++ {
  184. if pattern[i] == '*' {
  185. wild = true
  186. break
  187. }
  188. if pattern[i] == '?' {
  189. minb = append(minb, 0)
  190. maxb = append(maxb, maxRuneBytes[:]...)
  191. } else {
  192. minb = append(minb, pattern[i])
  193. maxb = append(maxb, pattern[i])
  194. }
  195. }
  196. if wild {
  197. r, n := utf8.DecodeLastRune(maxb)
  198. if r != utf8.RuneError {
  199. if r < utf8.MaxRune {
  200. r++
  201. if r > 0x7f {
  202. b := make([]byte, 4)
  203. nn := utf8.EncodeRune(b, r)
  204. maxb = append(maxb[:len(maxb)-n], b[:nn]...)
  205. } else {
  206. maxb = append(maxb[:len(maxb)-n], byte(r))
  207. }
  208. }
  209. }
  210. }
  211. return string(minb), string(maxb)
  212. }
  213. // IsPattern returns true if the string is a pattern.
  214. func IsPattern(str string) bool {
  215. for i := 0; i < len(str); i++ {
  216. if str[i] == '*' || str[i] == '?' {
  217. return true
  218. }
  219. }
  220. return false
  221. }