tct-main.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. package controllers_scraper
  2. import (
  3. "encoding/json"
  4. e "github.com/dabory/abango-rest/etc"
  5. "kkscrap-go/controllers/scraper/cafe24"
  6. "kkscrap-go/controllers/scraper/dabory"
  7. "kkscrap-go/controllers/scraper/godo"
  8. "kkscrap-go/controllers/scraper/magento"
  9. "kkscrap-go/controllers/scraper/shopify"
  10. "kkscrap-go/controllers/scraper/util"
  11. "kkscrap-go/controllers/scraper/wordpress"
  12. "kkscrap-go/controllers/scraper/young"
  13. "kkscrap-go/locals"
  14. "kkscrap-go/model"
  15. "net/http"
  16. "net/url"
  17. "regexp"
  18. "strings"
  19. // "golang.org/x/crypto/bcrypt"
  20. "github.com/labstack/echo"
  21. )
  22. type SolutionTypeGetReq struct {
  23. Url string
  24. }
  25. func SolutionTypeGet(c echo.Context) error {
  26. v := c.Get("receiver").(SolutionTypeGetReq)
  27. retv := &struct {
  28. SolutionType string
  29. ThemeType string
  30. }{}
  31. body, err := util.Get(v.Url)
  32. if err != nil {
  33. return c.JSON(404, err.Error())
  34. }
  35. // fmt.Println(v.Url)
  36. st, theme, err := getSolutionType(body)
  37. if err != nil {
  38. return c.JSONBlob(http.StatusOK, []byte(err.Error()))
  39. }
  40. retv.SolutionType = string(st)
  41. retv.ThemeType = theme
  42. return c.JSON(http.StatusOK, retv)
  43. }
  44. type ProductPageGetReq struct {
  45. SolutionType model.SolutionType
  46. ThemeType string
  47. Products []ProductUri
  48. }
  49. type ProductUri struct {
  50. Uri string
  51. }
  52. // 오리지널 웹사이트 전체를 업테이트 하는 경우 Uri를 하나씩 보내면 비효율적이므로 하나의 배치로
  53. // 묶어서 요청할 수 있도록 한다. 주로 product-page-get를 쓰고 item-url-scrap은 deprecate 예정임.
  54. func ProductPageGet(c echo.Context) error {
  55. v := c.Get("receiver").(ProductPageGetReq)
  56. var vRet locals.ProductPage // Row(개별레코드)->Page(Row의 집합)->Book(Page의 집합)의 개념
  57. for _, p := range v.Products {
  58. body, err := util.Get(p.Uri)
  59. if err != nil {
  60. e.ErrLog(e.FuncRun("03uoaiuor0", e.CurrFuncName()), err)
  61. }
  62. // 전체 웹사이트가 아니라 개별 상품페이지(1개페이지)의 경우 SolutionType 없이 request됨
  63. if v.SolutionType == "" {
  64. var err error
  65. v.SolutionType, v.ThemeType, err = getSolutionType(body)
  66. if err != nil {
  67. return c.String(705, "Solution Type Not Found:"+err.Error())
  68. }
  69. }
  70. prodInfo := parseSolution(v.SolutionType, v.ThemeType, p.Uri, body)
  71. vRet.ProductPage = append(vRet.ProductPage, toProductPage(prodInfo))
  72. }
  73. // ret, _ := json.MarshalIndent(itemInfo, "", "\t")
  74. ret, _ := json.Marshal(vRet)
  75. return c.JSONBlob(http.StatusOK, ret)
  76. }
  77. func toProductPage(info model.ItemInfo) locals.Product {
  78. ret := locals.Product{
  79. SolutionName: locals.SolutionType(info.SolutionName),
  80. Version: info.Version,
  81. Emails: info.Emails,
  82. DomainName: info.DomainName,
  83. DomainURI: info.DomainURI,
  84. ItemName: info.ItemName,
  85. ItemNick: info.ItemNick,
  86. ModelName: info.ModelName,
  87. ModelNo: info.ModelNo,
  88. BrandName: info.BrandName,
  89. Sku: info.Sku,
  90. ItemCategory: info.ItemCategory,
  91. Manufacturer: info.Manufacturer,
  92. Origin: info.Origin,
  93. Language: info.Language,
  94. Currency: info.Currency,
  95. SalesPrice: info.SalesPrice,
  96. DeliveryPrice: info.DeliveryPrice,
  97. MinimumQty: info.MinimumQty,
  98. UserCredit: info.UserCredit,
  99. Options: nil,
  100. Images: info.Images,
  101. ShortDesc: info.ShortDesc,
  102. OriginDesc: info.OriginDesc,
  103. TextDesc: info.TextDesc,
  104. }
  105. for i, v := range info.Options {
  106. ret.Options = append(ret.Options, locals.Option{
  107. Name: v.Name,
  108. })
  109. ret.Options[i].Choices = make([]locals.Choice, 0)
  110. for _, choice := range v.Choices {
  111. ret.Options[i].Choices = append(ret.Options[i].Choices, locals.Choice{
  112. Name: choice.Name,
  113. Price: choice.Price,
  114. })
  115. }
  116. }
  117. return ret
  118. }
  119. type ItemUrlScrapReq struct {
  120. ItemUrl string
  121. }
  122. func ItemUrlScrap(c echo.Context) error {
  123. v := c.Get("receiver").(ItemUrlScrapReq)
  124. body, err := util.Get(v.ItemUrl)
  125. if err != nil {
  126. return err
  127. }
  128. itemInfo, err := parse(v.ItemUrl, body)
  129. if err != nil {
  130. return c.String(604, "ertvwerawqfd-ItemUrl Parse failed: "+err.Error())
  131. }
  132. ret, _ := json.MarshalIndent(itemInfo, "", "\t")
  133. // fmt.Println(string(data))
  134. // ret, _ := json.Marshal(itemInfo)
  135. return c.JSONBlob(http.StatusOK, ret)
  136. }
  137. var regexpTitle *regexp.Regexp
  138. func init() {
  139. regexpTitle, _ = regexp.Compile("<title>(.*)</title>")
  140. }
  141. func getTitle(body string) string {
  142. ss := regexpTitle.FindAllStringSubmatch(body, 1)
  143. if len(ss) == 1 {
  144. return ss[0][1]
  145. }
  146. return ""
  147. }
  148. func parse(uri, body string) (ret model.ItemInfo, err error) {
  149. t, theme, err := getSolutionType(body)
  150. if err != nil {
  151. return
  152. }
  153. ret = parseSolution(t, theme, uri, body)
  154. return
  155. }
  156. func parseSolution(t model.SolutionType, theme, uri, body string) (ret model.ItemInfo) {
  157. ret.SolutionName = t
  158. u, err := url.Parse(uri)
  159. if err != nil {
  160. return
  161. }
  162. ret.DomainName = u.Host
  163. ret.DomainURI = uri
  164. if t == model.SolutionTypeWooCommerce {
  165. wordpress.Parse(body, &ret)
  166. } else if t == model.SolutionTypeDabory {
  167. dabory.Parse(body, &ret)
  168. } else if t == model.SolutionTypeShopify {
  169. shopify.Parse(body, &ret)
  170. } else if t == model.SolutionTypeMagento {
  171. magento.Parse(body, &ret)
  172. } else if t == model.SolutionTypeCafe24 {
  173. cafe24.Parse(body, &ret)
  174. } else if t == model.SolutionTypeGodo {
  175. godo.Parse(body, &ret)
  176. } else if t == model.SolutionTypeYoung {
  177. young.Parse(body, &ret)
  178. } else if t == model.SolutionTypeOthers {
  179. magento.Parse(body, &ret)
  180. }
  181. return
  182. }
  183. func getSolutionType(body string) (t model.SolutionType, theme string, reterr error) {
  184. if strings.Contains(body, "window.CAFE24") {
  185. t = model.SolutionTypeCafe24
  186. } else if strings.Contains(body, "dbrshop") {
  187. t = model.SolutionTypeDabory
  188. } else if strings.Contains(body, "/wp-content/") {
  189. t = model.SolutionTypeWooCommerce
  190. } else if strings.Contains(body, "고도몰5") {
  191. t = model.SolutionTypeGodo
  192. } else if strings.Contains(body, "cdn.shopify.com") {
  193. t = model.SolutionTypeShopify
  194. } else if strings.Contains(body, "magento") {
  195. t = model.SolutionTypeMagento
  196. } else if strings.Contains(body, "it_id=") {
  197. t = model.SolutionTypeYoung
  198. } else {
  199. t = model.SolutionTypeOthers
  200. //reterr = errors.New("no found solution type")
  201. }
  202. theme = ""
  203. re := regexp.MustCompile(`wp-content\/themes\/(.+?)\/`)
  204. result := re.FindStringSubmatch(body)
  205. if len(result) > 1 {
  206. theme = result[1]
  207. } else {
  208. theme = "generic"
  209. }
  210. //ioutil.WriteFile(string(t) + ".html", []byte(body), 644)
  211. return
  212. }