123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- package controllers_scraper
- import (
- "encoding/json"
- "kkscrap-go/controllers/scraper/cafe24"
- "kkscrap-go/controllers/scraper/godo"
- "kkscrap-go/controllers/scraper/magento"
- "kkscrap-go/controllers/scraper/shopify"
- "kkscrap-go/controllers/scraper/wordpress"
- "kkscrap-go/controllers/scraper/young"
- "kkscrap-go/model"
- "net/http"
- "net/url"
- "regexp"
- "strings"
- util "kkscrap-go/controllers/scraper/util"
- // "golang.org/x/crypto/bcrypt"
- "github.com/labstack/echo"
- )
- type ItemUrlScrapReq struct {
- ItemUrl string
- }
- func ItemUrlScrap(c echo.Context) error {
- v := c.Get("receiver").(ItemUrlScrapReq)
- // retv := &struct {
- // model.ItemInfo
- // }{}
- itemInfo, err := parse(v.ItemUrl)
- if err != nil {
- return c.String(604, "ertvwerawqfd-ItemUrl Parse failed: "+err.Error())
- }
- ret, _ := json.MarshalIndent(itemInfo, "", "\t")
- // fmt.Println(string(data))
- // ret, _ := json.Marshal(itemInfo)
- return c.JSONBlob(http.StatusOK, ret)
- }
- var regexpTitle *regexp.Regexp
- func init() {
- regexpTitle, _ = regexp.Compile("<title>(.*)</title>")
- }
- func getTitle(body string) string {
- ss := regexpTitle.FindAllStringSubmatch(body, 1)
- if len(ss) == 1 {
- return ss[0][1]
- }
- return ""
- }
- func parse(uri string) (ret model.ItemInfo, err error) {
- t, err := getSolutionType(uri)
- if err != nil {
- return
- }
- ret = parseSolution(t, uri)
- return
- }
- func parseSolution(t model.SolutionType, uri string) (ret model.ItemInfo) {
- ret.SolutionName = t
- u, err := url.Parse(uri)
- if err != nil {
- return
- }
- ret.DomainName = u.Host
- ret.DomainURI = uri
- if t == model.SolutionTypeWooCommerce {
- wordpress.Parse(uri, &ret)
- } else if t == model.SolutionTypeShopify {
- shopify.Parse(uri, &ret)
- } else if t == model.SolutionTypeMagento {
- magento.Parse(uri, &ret)
- } else if t == model.SolutionTypeCafe24 {
- cafe24.Parse(uri, &ret)
- } else if t == model.SolutionTypeGodo {
- godo.Parse(uri, &ret)
- } else if t == model.SolutionTypeYoung {
- young.Parse(uri, &ret)
- } else if t == model.SolutionTypeOthers {
- magento.Parse(uri, &ret)
- }
- return
- }
- func getSolutionType(url string) (t model.SolutionType, reterr error) {
- body, err := util.Get(url)
- if err != nil {
- reterr = err
- return
- }
- if strings.Contains(body, "window.CAFE24") {
- t = model.SolutionTypeCafe24
- } else if strings.Contains(body, "woocommerce-page") {
- t = model.SolutionTypeWooCommerce
- } else if strings.Contains(body, "고도몰5") {
- t = model.SolutionTypeGodo
- } else if strings.Contains(body, "cdn.shopify.com") {
- t = model.SolutionTypeShopify
- } else if strings.Contains(body, "magento") {
- t = model.SolutionTypeMagento
- } else if strings.Contains(body, "it_id=") {
- t = model.SolutionTypeYoung
- } else {
- t = model.SolutionTypeOthers
- //reterr = errors.New("no found solution type")
- }
- //ioutil.WriteFile(string(t) + ".html", []byte(body), 644)
- return
- }
|