package cafe24 import ( "fmt" "kkscrap-go/controllers/scraper/common" util "kkscrap-go/controllers/scraper/util" "kkscrap-go/model" "strings" "github.com/PuerkitoBio/goquery" ) func Parse(body string, item *model.ItemInfo) { item.Language = common.GetLanguage(body) doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) //s := doc.Find("meta[property=\"og:url\"]") doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) { v, _ := s.Attr("property") if v == "og:description" { item.ShortDesc = s.AttrOr("content", "") } else if v == "og:title" { item.ItemName = s.AttrOr("content", "") } else if v == "og:image" { item.Images = append(item.Images, s.AttrOr("content", "")) } else if v == "product:price:amount" { item.SalesPrice = util.GetFloat32(s.AttrOr("content", "")) } else if v == "product:price:currency" { item.Currency = s.AttrOr("content", "") } }) item.Images = getImages(body) item.OriginDesc = getDesc(body) item.TextDesc = getTextDesc(body) item.Emails = common.GetEmails(body) return } func getImages(body string) (images []string) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find(".ThumbImage") s.Each(func(i int, selection *goquery.Selection) { if src, existed := selection.Attr("src"); existed { if strings.HasPrefix(src, "https:") { images = append(images, src) } else { images = append(images, fmt.Sprintf("https:%v", src)) } } }) return } func getDesc(body string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) html := []string{} util.CheckError(err) s := doc.Find("div.xans-product #prdInfo") s.Each(func(i int, selection *goquery.Selection) { // log.Println(selection.Html()) if !selection.HasClass("menu") { html = append(html, selection.Text()) } }) return strings.Join(html, "/n") } func getTextDesc(body string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) text := doc.Find("div.xans-product #prdDetail span p") html, _ := text.Html() return html }