package wordpress import ( "encoding/json" "fmt" "github.com/PuerkitoBio/goquery" "kkscrap-go/controllers/scraper/common" "kkscrap-go/controllers/scraper/util" "kkscrap-go/model" "regexp" "strings" ) func Parse(body string, item *model.ItemInfo) { p := getProduct(body) if p.Image != "" { item.Images = append(item.Images, p.Image) } item.Images = append(item.Images, getImages(body)...) if p.Offers != nil && len(p.Offers) > 0 { item.SalesPrice = util.GetFloat32(p.Offers[0].Price) item.Currency = p.Offers[0].Pricecurrency } item.Sku = fmt.Sprintf("%v", p.Sku) item.ShortDesc = p.Description item.OriginDesc = getProductDescription(body) item.TextDesc = getTextDesc(body) item.ItemName = p.Name item.Language = common.GetLanguage(body) item.Emails = common.GetEmails(body) item.Options = append(item.Options, getOptions(body)) return } func getProductDescription(body string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("div.woocommerce-Tabs-panel--description") //s.Each(func(i int, selection *goquery.Selection) { // log.Println(selection.Html()) //}) html, err := s.Html() if err != nil { return "" } return html } func getTextDesc(body string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) text := doc.Find("div.woocommerce-tabs").Text() return text } func getProduct(body string) (ret WordPressProduct) { jsonStr := getJson(body) if idx := strings.Index(jsonStr, "@graph"); idx >= 0 { p := WordPressItem{} json.Unmarshal([]byte(jsonStr), &p) ret = p.Graph[1] } else { json.Unmarshal([]byte(jsonStr), &ret) } return } func getJson(body string) string { idx := strings.Index(body, "") body = body[:idx] return body } func getImages(body string) (ret []string) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) //doc.Find("img.wp-post-image").Each(func(i int, s *goquery.Selection) { // if src, ok := s.Attr("src"); ok { // width, _ := s.Attr("width") // height, _ := s.Attr("height") // w, _ := strconv.ParseInt(width, 10, 64) // h, _ := strconv.ParseInt(height, 10, 64) // //log.Println(src, width, height) // ret = append(ret, model.Image{ // Path: src, // Width: int(w), // Height: int(h), // }) // } //}) //sort.Slice(ret, func(i, j int) bool { // return ret[i].Width*ret[i].Height > ret[j].Width*ret[j].Height //}) //ret = ret[:1] re := regexp.MustCompile(`-\d+x\d+\.`) doc.Find("figure.woocommerce-product-gallery__wrapper div").Each(func(i int, s *goquery.Selection) { if src, ok := s.Attr("data-thumb"); ok { newImgUrl := re.ReplaceAllString(src, ".") ret = append(ret, newImgUrl) } }) return } func getPrice(body string) (ret string) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("span.woocommerce-Price-amount.amount") s.Each(func(i int, selection *goquery.Selection) { if i == 0 { ret = selection.Text() } }) //log.Println(s.Nodes[0].FirstChild.FirstChild.Data) //log.Println(s.Nodes[0].FirstChild.NextSibling.Data) return } func getCategories(body string) (ret string) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("span.posted_in") ret = s.Text() idx := strings.Index(ret, ":") if idx > 0 { ret = strings.TrimSpace(ret[idx+1:]) } //log.Println(s.Nodes[0].FirstChild.FirstChild.Data) //log.Println(s.Nodes[0].FirstChild.NextSibling.Data) return } func getOptions(body string) (ret model.Option) { doc, err := goquery.NewDocumentFromReader(strings.NewReader(body)) util.CheckError(err) s := doc.Find("body select") s.Each(func(i int, selection *goquery.Selection) { skippedFistOption := false v, _ := selection.Attr("name") if v != "rating" { selection.Find("option").Each(func(i int, selection *goquery.Selection) { if skippedFistOption { ret.Choices = append(ret.Choices, model.Choice{ Name: selection.Text(), }) } else { skippedFistOption = true } }) } }) return }