tct-main.go 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. package controllers_scraper
  2. import (
  3. "encoding/json"
  4. "kkscrap-go/controllers/scraper/cafe24"
  5. "kkscrap-go/controllers/scraper/godo"
  6. "kkscrap-go/controllers/scraper/magento"
  7. "kkscrap-go/controllers/scraper/shopify"
  8. "kkscrap-go/controllers/scraper/wordpress"
  9. "kkscrap-go/controllers/scraper/young"
  10. "kkscrap-go/model"
  11. "net/http"
  12. "net/url"
  13. "regexp"
  14. "strings"
  15. util "kkscrap-go/controllers/scraper/util"
  16. // "golang.org/x/crypto/bcrypt"
  17. "github.com/labstack/echo"
  18. )
  19. type ItemUrlScrapReq struct {
  20. ItemUrl string
  21. }
  22. func ItemUrlScrap(c echo.Context) error {
  23. v := c.Get("receiver").(ItemUrlScrapReq)
  24. // retv := &struct {
  25. // model.ItemInfo
  26. // }{}
  27. itemInfo, err := parse(v.ItemUrl)
  28. if err != nil {
  29. return c.String(604, "ertvwerawqfd-ItemUrl Parse failed: "+err.Error())
  30. }
  31. ret, _ := json.MarshalIndent(itemInfo, "", "\t")
  32. // fmt.Println(string(data))
  33. // ret, _ := json.Marshal(itemInfo)
  34. return c.JSONBlob(http.StatusOK, ret)
  35. }
  36. var regexpTitle *regexp.Regexp
  37. func init() {
  38. regexpTitle, _ = regexp.Compile("<title>(.*)</title>")
  39. }
  40. func getTitle(body string) string {
  41. ss := regexpTitle.FindAllStringSubmatch(body, 1)
  42. if len(ss) == 1 {
  43. return ss[0][1]
  44. }
  45. return ""
  46. }
  47. func parse(uri string) (ret model.ItemInfo, err error) {
  48. t, err := getSolutionType(uri)
  49. if err != nil {
  50. return
  51. }
  52. ret = parseSolution(t, uri)
  53. return
  54. }
  55. func parseSolution(t model.SolutionType, uri string) (ret model.ItemInfo) {
  56. ret.SolutionName = t
  57. u, err := url.Parse(uri)
  58. if err != nil {
  59. return
  60. }
  61. ret.DomainName = u.Host
  62. ret.DomainURI = uri
  63. if t == model.SolutionTypeWooCommerce {
  64. wordpress.Parse(uri, &ret)
  65. } else if t == model.SolutionTypeShopify {
  66. shopify.Parse(uri, &ret)
  67. } else if t == model.SolutionTypeMagento {
  68. magento.Parse(uri, &ret)
  69. } else if t == model.SolutionTypeCafe24 {
  70. cafe24.Parse(uri, &ret)
  71. } else if t == model.SolutionTypeGodo {
  72. godo.Parse(uri, &ret)
  73. } else if t == model.SolutionTypeYoung {
  74. young.Parse(uri, &ret)
  75. } else if t == model.SolutionTypeOthers {
  76. magento.Parse(uri, &ret)
  77. }
  78. return
  79. }
  80. func getSolutionType(url string) (t model.SolutionType, reterr error) {
  81. body, err := util.Get(url)
  82. if err != nil {
  83. reterr = err
  84. return
  85. }
  86. if strings.Contains(body, "window.CAFE24") {
  87. t = model.SolutionTypeCafe24
  88. } else if strings.Contains(body, "woocommerce-page") {
  89. t = model.SolutionTypeWooCommerce
  90. } else if strings.Contains(body, "고도몰5") {
  91. t = model.SolutionTypeGodo
  92. } else if strings.Contains(body, "cdn.shopify.com") {
  93. t = model.SolutionTypeShopify
  94. } else if strings.Contains(body, "magento") {
  95. t = model.SolutionTypeMagento
  96. } else if strings.Contains(body, "it_id=") {
  97. t = model.SolutionTypeYoung
  98. } else {
  99. t = model.SolutionTypeOthers
  100. //reterr = errors.New("no found solution type")
  101. }
  102. //ioutil.WriteFile(string(t) + ".html", []byte(body), 644)
  103. return
  104. }