123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- package cafe24
- import (
- "fmt"
- "kkscrap-go/controllers/scraper/common"
- util "kkscrap-go/controllers/scraper/util"
- "kkscrap-go/model"
- "strings"
- "github.com/PuerkitoBio/goquery"
- )
- func Parse(body string, item *model.ItemInfo) {
- item.Language = common.GetLanguage(body)
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
- util.CheckError(err)
- //s := doc.Find("meta[property=\"og:url\"]")
- doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) {
- v, _ := s.Attr("property")
- if v == "og:description" {
- item.ShortDesc = s.AttrOr("content", "")
- } else if v == "og:title" {
- item.ItemName = s.AttrOr("content", "")
- } else if v == "og:image" {
- item.Images = append(item.Images, s.AttrOr("content", ""))
- } else if v == "product:price:amount" {
- item.SalesPrice = util.GetFloat32(s.AttrOr("content", ""))
- } else if v == "product:price:currency" {
- item.Currency = s.AttrOr("content", "")
- }
- })
- item.Images = getImages(body)
- item.OriginDesc = getDesc(body)
- item.TextDesc = getTextDesc(body)
- item.Emails = common.GetEmails(body)
- return
- }
- func getImages(body string) (images []string) {
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
- util.CheckError(err)
- s := doc.Find(".ThumbImage")
- s.Each(func(i int, selection *goquery.Selection) {
- if src, existed := selection.Attr("src"); existed {
- if strings.HasPrefix(src, "https:") {
- images = append(images, src)
- } else {
- images = append(images, fmt.Sprintf("https:%v", src))
- }
- }
- })
- return
- }
- func getDesc(body string) string {
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
- html := []string{}
- util.CheckError(err)
- s := doc.Find("div.xans-product #prdInfo")
- s.Each(func(i int, selection *goquery.Selection) {
- // log.Println(selection.Html())
- if !selection.HasClass("menu") {
- html = append(html, selection.Text())
- }
- })
- return strings.Join(html, "/n")
- }
- func getTextDesc(body string) string {
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
- util.CheckError(err)
- text := doc.Find("div.xans-product #prdDetail span p")
- html, _ := text.Html()
- return html
- }
|