parser.go 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. package godo
  2. import (
  3. "kkscrap-go/controllers/scraper/common"
  4. "kkscrap-go/model"
  5. "regexp"
  6. "strings"
  7. util "kkscrap-go/controllers/scraper/util"
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10. func Parse(body string, item *model.ItemInfo) {
  11. item.Language = getLanguage(body)
  12. doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
  13. util.CheckError(err)
  14. doc.Find("meta").Each(func(i int, s *goquery.Selection) {
  15. v, _ := s.Attr("property")
  16. if v == "og:title" {
  17. item.ItemName = s.AttrOr("content", "")
  18. } else if v == "og:image" {
  19. item.Images = append(item.Images, s.AttrOr("content", ""))
  20. } else if v == "product:price:amount" {
  21. item.SalesPrice = util.GetFloat32(s.AttrOr("content", ""))
  22. } else if v == "product:price:currency" {
  23. item.Currency = s.AttrOr("content", "")
  24. }
  25. })
  26. item.OriginDesc, _ = doc.Find("div.detail_cont").Html()
  27. cats := doc.Find("div.location_tit").Text()
  28. if strings.Contains(cats, ",") {
  29. item.ItemCategory = strings.Split(cats, ",")
  30. } else if strings.Contains(cats, "/") {
  31. item.ItemCategory = strings.Split(cats, "/")
  32. }
  33. item.DeliveryPrice = util.GetPrice(doc.Find("dl.item_delivery>dd>strong").Text())
  34. doc.Find("select.chosen-select").Each(func(i int, selection *goquery.Selection) {
  35. opt := model.Option{}
  36. selection.Find("option").Each(func(i int, selection *goquery.Selection) {
  37. opt.Choices = append(opt.Choices, model.Choice{Name: selection.Text()})
  38. })
  39. item.Options = append(item.Options, opt)
  40. })
  41. doc.Find("li>a>img.middle").Each(func(i int, selection *goquery.Selection) {
  42. img, ok := selection.Attr("src")
  43. if ok {
  44. item.Images = append(item.Images, img)
  45. }
  46. })
  47. item.SalesPrice = getPrice(body)
  48. item.Currency = getCurrency(body)
  49. item.Emails = common.GetEmails(body)
  50. return
  51. }
  52. func getLanguage(body string) string {
  53. reg, _ := regexp.Compile("var gdLocale = '(.*)';")
  54. ss := reg.FindStringSubmatch(body)
  55. return ss[1]
  56. }
  57. func getCurrency(body string) string {
  58. reg, _ := regexp.Compile("var gdCurrencyCode = '(.*)';")
  59. ss := reg.FindStringSubmatch(body)
  60. return ss[1]
  61. }
  62. func getPrice(body string) float32 {
  63. reg, _ := regexp.Compile("'setGoodsPrice'[ ]*:[ ]*'(.*)'")
  64. ss := reg.FindStringSubmatch(body)
  65. return util.GetFloat32(ss[1])
  66. }