@@ -0,0 +1,92 @@
+package main
+import (
+ "fmt"
+ "log"
+ "net/http"
+ "regexp"
+ "github.com/PuerkitoBio/goquery"
+func main() {
+ // Wordpress
+ // url := "https://addand.kr/shop/new-%ed%95%9c-%ea%b6%8c%ec%9c%bc%eb%a1%9c-%eb%81%9d%eb%82%98%eb%8a%94-%eb%85%b8%ec%85%98/"
+ url := "https://droppii.net.vn/cnd-ginseng-gold"
+ // Send an HTTP GET request to the URL
+ response, err := http.Get(url)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer response.Body.Close()
+ doc, err := goquery.NewDocumentFromReader(response.Body)
+ if err != nil {
+ log.Fatal(err)
+ }
+ // These will the value of PDP parsing structures
+ cItemName := ".product_title"
+ cShortDesc := ".woocommerce-product-details__short-description"
+ cItemCategory := ".single-breadcrumbs-wrapper"
+ cItemTags := ".tagged_as"
+ cItemImages := ".woocommerce-product-gallery__image a"
+ cItemTextDesc := "div.product-tabs-wrapper"
+ // cItemOptions := "select#rating option"
+ cItemPrice := ".summary>.summary-inner>.price .woocommerce-Price-amount.amount"
+ cEmail := "footer"
+ cAddress := ".elementor-widget-container > p"
+ cRating := ".star-rating .rating"
+ cAuthor := ".product-brand a"
+ cVideo := "iframe"
+ fmt.Println("ItemName: ", doc.Find(cItemName).First().Text())
+ fmt.Println("ShortDesc: ", doc.Find(cShortDesc).First().Text())
+ fmt.Println("ItemCategory: ", doc.Find(cItemCategory).First().Text())
+ fmt.Println("ItemTags: ", doc.Find(cItemTags).First().Text())
+ fmt.Println("TextDesc: ", doc.Find(cItemTextDesc).First().Text())
+ fmt.Println("ItemPice: ", doc.Find(cItemPrice).First().Text())
+ fmt.Println("Email: ", findEmail(doc.Find(cEmail).First().Text(), ""))
+ fmt.Println("Address: ", doc.Find(cAddress).First().Text())
+ fmt.Println("Rating : ", doc.Find(cRating).First().Text())
+ // doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) {
+ // v, _ := s.Attr("property")
+ // fmt.Println(v, " : ", s.AttrOr("content", ""))
+ // })
+ fmt.Println("Author : ")
+ doc.Find(cAuthor).Each(func(index int, element *goquery.Selection) {
+ link, _ := element.Attr("href")
+ fmt.Println("====================")
+ fmt.Println("AuthorName: ", element.Text())
+ fmt.Printf("AuthorLink: %s\n", link)
+ })
+ vdos := []string{}
+ doc.Find(cVideo).Each(func(index int, element *goquery.Selection) {
+ embed, _ := element.Attr("src")
+ vdos = append(vdos, embed)
+ })
+ fmt.Printf("ItemVideos: %s\n", vdos)
+ imgs := []string{}
+ fmt.Println("ItemImages: ")
+ doc.Find(cItemImages).Each(func(index int, element *goquery.Selection) {
+ img, _ := element.Attr("href")
+ imgs = append(imgs, img)
+ })
+ fmt.Println("", imgs)
+func findEmail(body string, doms string) (emails []string) {
+ r, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`)
+ emails = append(emails, r.FindStringSubmatch(body)...)
+ return