package wordpress import ( "encoding/json" "fmt" "kkscrap-go/controllers/scraper/common" "kkscrap-go/model" "strings" util "kkscrap-go/controllers/scraper/util" "github.com/PuerkitoBio/goquery" ) func Parse(uri string, item *model.ItemInfo) { body, err := util.Get(uri) util.CheckError(err) p := getProduct(body) item.Images = append(item.Images, p.Image) item.Images = append(item.Images, getImages(body)...) item.SalesPrice = util.GetFloat32(p.Offers[0].Price) item.Sku = fmt.Sprintf("%v", p.Sku) item.ShortDesc = p.Description item.OriginDesc = getDesc(body) item.TextDesc = getTextDesc(body) item.ItemName = p.Name item.Currency = p.Offers[0].Pricecurrency item.Language = common.GetLanguage(body) item.Emails = common.GetEmails(body) item.Options = append(item.Options, getOptions(body)) return } func getDesc(body string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("div.woocommerce-tabs") //s.Each(func(i int, selection *goquery.Selection) { // log.Println(selection.Html()) //}) html, err := s.Html() if err != nil { return "" } return html } func getTextDesc(body string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) text := doc.Find("div.woocommerce-tabs").Text() return text } func getProduct(body string) (ret WordPressProduct) { jsonStr := getJson(body) if idx := strings.Index(jsonStr, "@graph"); idx >= 0 { p := WordPressItem{} json.Unmarshal([]byte(jsonStr), &p) ret = p.Graph[1] } else { json.Unmarshal([]byte(jsonStr), &ret) } return } func getJson(body string) string { idx := strings.Index(body, "") body = body[:idx] return body } func getImages(body string) (ret []string) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) //doc.Find("img.wp-post-image").Each(func(i int, s *goquery.Selection) { // if src, ok := s.Attr("src"); ok { // width, _ := s.Attr("width") // height, _ := s.Attr("height") // w, _ := strconv.ParseInt(width, 10, 64) // h, _ := strconv.ParseInt(height, 10, 64) // //log.Println(src, width, height) // ret = append(ret, model.Image{ // Path: src, // Width: int(w), // Height: int(h), // }) // } //}) //sort.Slice(ret, func(i, j int) bool { // return ret[i].Width*ret[i].Height > ret[j].Width*ret[j].Height //}) //ret = ret[:1] doc.Find("figure.woocommerce-product-gallery__wrapper div").Each(func(i int, s *goquery.Selection) { if src, ok := s.Attr("data-thumb"); ok { ret = append(ret, src) } }) return } func getPrice(body string) (ret string) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("span.woocommerce-Price-amount.amount") s.Each(func(i int, selection *goquery.Selection) { if i == 0 { ret = selection.Text() } }) //log.Println(s.Nodes[0].FirstChild.FirstChild.Data) //log.Println(s.Nodes[0].FirstChild.NextSibling.Data) return } func getCategories(body string) (ret string) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("span.posted_in") ret = s.Text() idx := strings.Index(ret, ":") if idx > 0 { ret = strings.TrimSpace(ret[idx+1:]) } //log.Println(s.Nodes[0].FirstChild.FirstChild.Data) //log.Println(s.Nodes[0].FirstChild.NextSibling.Data) return } func getOptions(body string) (ret model.Option) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("body select") s.Each(func(i int, selection *goquery.Selection) { skippedFistOption := false v, _ := selection.Attr("name") if v != "rating" { selection.Find("option").Each(func(i int, selection *goquery.Selection) { if skippedFistOption { ret.Choices = append(ret.Choices, model.Choice{ Name: selection.Text(), }) } else { skippedFistOption = true } }) } }) return }