package common import ( "kkscrap-go/model" "regexp" "strings" util "kkscrap-go/controllers/scraper/util" "github.com/PuerkitoBio/goquery" ) func GetLanguage(body string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) ret, _ := doc.Find("html").Attr("lang") return ret } func GetEmails(body string) (ret []string) { reg := regexp.MustCompile("[a-zA-Z0-9.!#$%&*+\\-/=?^_`{|}~]+@[a-z0-9.\\-]+\\.[a-z]+") ss := reg.FindAllStringSubmatch(body, -1) m := make(map[string]bool) for _, s := range ss { e := s[0] if e == "support@crema.me" || e == "support@snapvi.co.kr" || strings.HasPrefix(e, "/") || strings.HasPrefix(e, "http://") || strings.HasPrefix(e, "https://") || strings.HasSuffix(e, ".png") || strings.HasSuffix(e, ".jpg") || strings.HasSuffix(e, ".js") { continue } m[s[0]] = true } for k, _ := range m { ret = append(ret, k) } return ret } func Parse(uri string, item *model.ItemInfo) { body, err := util.Get(uri) util.CheckError(err) item.Language = GetLanguage(body) doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) doc.Find("meta").Each(func(i int, s *goquery.Selection) { v, _ := s.Attr("property") if v == "og:description" { item.ShortDesc = s.AttrOr("content", "") } else if v == "og:title" { item.ItemName = s.AttrOr("content", "") } else if v == "og:image" { item.Images = append(item.Images, s.AttrOr("content", "")) } else if v == "product:price:amount" { item.SalesPrice = util.GetFloat32(s.AttrOr("content", "")) } else if v == "product:price:currency" { item.Currency = s.AttrOr("content", "") } }) return }