parser.go 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. package cafe24
  2. import (
  3. "fmt"
  4. "kkscrap-go/controllers/scraper/common"
  5. util "kkscrap-go/controllers/scraper/util"
  6. "kkscrap-go/model"
  7. "strings"
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10. func Parse(body string, item *model.ItemInfo) {
  11. item.Language = common.GetLanguage(body)
  12. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  13. util.CheckError(err)
  14. //s := doc.Find("meta[property=\"og:url\"]")
  15. doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) {
  16. v, _ := s.Attr("property")
  17. if v == "og:description" {
  18. item.ShortDesc = s.AttrOr("content", "")
  19. } else if v == "og:title" {
  20. item.ItemName = s.AttrOr("content", "")
  21. } else if v == "og:image" {
  22. item.Images = append(item.Images, s.AttrOr("content", ""))
  23. } else if v == "product:price:amount" {
  24. item.SalesPrice = util.GetFloat32(s.AttrOr("content", ""))
  25. } else if v == "product:price:currency" {
  26. item.Currency = s.AttrOr("content", "")
  27. }
  28. })
  29. item.Images = getImages(body)
  30. item.OriginDesc = getDesc(body)
  31. item.TextDesc = getTextDesc(body)
  32. item.Emails = common.GetEmails(body)
  33. return
  34. }
  35. func getImages(body string) (images []string) {
  36. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  37. util.CheckError(err)
  38. s := doc.Find(".ThumbImage")
  39. s.Each(func(i int, selection *goquery.Selection) {
  40. if src, existed := selection.Attr("src"); existed {
  41. if strings.HasPrefix(src, "https:") {
  42. images = append(images, src)
  43. } else {
  44. images = append(images, fmt.Sprintf("https:%v", src))
  45. }
  46. }
  47. })
  48. return
  49. }
  50. func getDesc(body string) string {
  51. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  52. html := []string{}
  53. util.CheckError(err)
  54. s := doc.Find("div.xans-product #prdInfo")
  55. s.Each(func(i int, selection *goquery.Selection) {
  56. // log.Println(selection.Html())
  57. if !selection.HasClass("menu") {
  58. html = append(html, selection.Text())
  59. }
  60. })
  61. return strings.Join(html, "/n")
  62. }
  63. func getTextDesc(body string) string {
  64. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  65. util.CheckError(err)
  66. text := doc.Find("div.xans-product #prdDetail span p")
  67. html, _ := text.Html()
  68. return html
  69. }