parser.go 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. package godo
  2. import (
  3. "kkscrap-go/controllers/scraper/common"
  4. "kkscrap-go/model"
  5. "regexp"
  6. "strings"
  7. util "kkscrap-go/controllers/scraper/util"
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10. func Parse(body string, item *model.ItemInfo) {
  11. item.Language = getLanguage(body)
  12. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  13. util.CheckError(err)
  14. doc.Find("meta").Each(func(i int, s *goquery.Selection) {
  15. v, _ := s.Attr("property")
  16. if v == "og:title" {
  17. item.ItemName = s.AttrOr("content", "")
  18. } else if v == "og:image" {
  19. item.Images = append(item.Images, s.AttrOr("content", ""))
  20. } else if v == "product:price:amount" {
  21. item.SalesPrice = util.GetFloat32(s.AttrOr("content", ""))
  22. } else if v == "product:price:currency" {
  23. item.Currency = s.AttrOr("content", "")
  24. }
  25. })
  26. item.OriginDesc, _ = doc.Find("div.detail_cont").Html()
  27. item.ItemCategory = doc.Find("div.location_tit").Text()
  28. item.DeliveryPrice = util.GetPrice(doc.Find("dl.item_delivery>dd>strong").Text())
  29. doc.Find("select.chosen-select").Each(func(i int, selection *goquery.Selection) {
  30. opt := model.Option{}
  31. selection.Find("option").Each(func(i int, selection *goquery.Selection) {
  32. opt.Choices = append(opt.Choices, model.Choice{Name: selection.Text()})
  33. })
  34. item.Options = append(item.Options, opt)
  35. })
  36. doc.Find("li>a>img.middle").Each(func(i int, selection *goquery.Selection) {
  37. img, ok := selection.Attr("src")
  38. if ok {
  39. item.Images = append(item.Images, img)
  40. }
  41. })
  42. item.SalesPrice = getPrice(body)
  43. item.Currency = getCurrency(body)
  44. item.Emails = common.GetEmails(body)
  45. return
  46. }
  47. func getLanguage(body string) string {
  48. reg, _ := regexp.Compile("var gdLocale = '(.*)';")
  49. ss := reg.FindStringSubmatch(body)
  50. return ss[1]
  51. }
  52. func getCurrency(body string) string {
  53. reg, _ := regexp.Compile("var gdCurrencyCode = '(.*)';")
  54. ss := reg.FindStringSubmatch(body)
  55. return ss[1]
  56. }
  57. func getPrice(body string) float32 {
  58. reg, _ := regexp.Compile("'setGoodsPrice'[ ]*:[ ]*'(.*)'")
  59. ss := reg.FindStringSubmatch(body)
  60. return util.GetFloat32(ss[1])
  61. }