parser.go 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. package godo
  2. import (
  3. "kkscrap-go/controllers/scraper/common"
  4. "kkscrap-go/model"
  5. "regexp"
  6. "strings"
  7. util "kkscrap-go/controllers/scraper/util"
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10. func Parse(uri string, item *model.ItemInfo) {
  11. body, err := util.Get(uri)
  12. util.CheckError(err)
  13. item.Language = getLanguage(body)
  14. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  15. util.CheckError(err)
  16. doc.Find("meta").Each(func(i int, s *goquery.Selection) {
  17. v, _ := s.Attr("property")
  18. if v == "og:title" {
  19. item.ItemName = s.AttrOr("content", "")
  20. } else if v == "og:image" {
  21. item.Images = append(item.Images, s.AttrOr("content", ""))
  22. } else if v == "product:price:amount" {
  23. item.SalesPrice = util.GetFloat32(s.AttrOr("content", ""))
  24. } else if v == "product:price:currency" {
  25. item.Currency = s.AttrOr("content", "")
  26. }
  27. })
  28. desc := doc.Find("#frmView > div > div > div.item_detail_tit > div").Text()
  29. if desc != "" {
  30. desc = strings.Split(desc, "\n")[0]
  31. }
  32. item.ShortDesc = desc
  33. item.SalesPrice = getPrice(body)
  34. item.Currency = getCurrency(body)
  35. item.Emails = common.GetEmails(body)
  36. return
  37. }
  38. func getLanguage(body string) string {
  39. reg, _ := regexp.Compile("var gdLocale = '(.*)';")
  40. ss := reg.FindStringSubmatch(body)
  41. return ss[1]
  42. }
  43. func getCurrency(body string) string {
  44. reg, _ := regexp.Compile("var gdCurrencyCode = '(.*)';")
  45. ss := reg.FindStringSubmatch(body)
  46. return ss[1]
  47. }
  48. func getPrice(body string) float32 {
  49. reg, _ := regexp.Compile("'setGoodsPrice'[ ]*:[ ]*'(.*)'")
  50. ss := reg.FindStringSubmatch(body)
  51. return util.GetFloat32(ss[1])
  52. }