package main import ( "fmt" "log" "net/http" "regexp" "github.com/PuerkitoBio/goquery" ) func main() { // Wordpress url := "https://bt-beloria-1.myshopify.com/collections/women-collection/products/sweater-classical-tshirt" // url := "https://seoulknit.com/shop/v-neck-summer-pullover/" // url = "https://www.elcanto.co.kr" //MakeShop // url = "https://www.ippngirl.co.kr" //MakeShop // url = "https://ssline.kr" //cafe24 // url = "https://koskomro.com" //cafe24 // url = "https://bt-beloria-1.myshopify.com" //shopify // url = "https://lachinatakorea.com" //Godomall // url = "https://sf-fd.com" //Godomall // url = "https://www.vanillagift.com" //Magento // url = "https://taiwan.coach.com" //Magento // url = "http://mas1.magikthemes.com" //Magento // url = "https://aladinmarket.co.kr" //young Cart // url = "http://damoagift.com" //young Cart // url = "https://p2u.daboryhost.com" /DaboryShop // url = "https://seoulknit.com" //Woocommerce // url = "http://webhost.dabory.com/" /Woocommerce // url = "https://addand.kr" // WooCommerce // url := "https://droppii.net.vn/cnd-ginseng-gold" // Send an HTTP GET request to the URL response, err := http.Get(url) if err != nil { log.Fatal(err) } defer response.Body.Close() doc, err := goquery.NewDocumentFromReader(response.Body) if err != nil { log.Fatal(err) } // These will the value of PDP parsing structures cItemName := ".product-single__title" cShortDesc := ".product-single__description" cItemCategory := ".product-single__sub-info:nth-of-type(1)" cItemTags := ".product-single__sub-info:nth-of-type(2)" cItemImages := ".pg__thumb a" cItemTextDesc := ".product-single__description" // cItemOptions := "select#rating option" cItemPrice := ".product-price-wrap>.product-price>.product-price__price" cEmail := "footer" cAddress := ".elementor-widget-container > p" cRating := ".pg__review-stars .spr-icon-star" cAuthor := ".product-brand a" cVideo := "iframe" fmt.Println("ItemName: ", doc.Find(cItemName).First().Text()) fmt.Println("ShortDesc: ", doc.Find(cShortDesc).First().Text()) fmt.Println("ItemCategory: ", doc.Find(cItemCategory).First().Text()) fmt.Println("ItemTags: ", doc.Find(cItemTags).First().Text()) fmt.Println("TextDesc: ", doc.Find(cItemTextDesc).First().Text()) fmt.Println("ItemPice: ", doc.Find(cItemPrice).First().Text()) fmt.Println("Email: ", findEmail(doc.Find(cEmail).First().Text(), "")) fmt.Println("Address: ", doc.Find(cAddress).First().Text()) fmt.Println("Rating : ", doc.Find(cRating).First().Text()) // doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) { // v, _ := s.Attr("property") // fmt.Println(v, " : ", s.AttrOr("content", "")) // }) fmt.Println("Author : ") doc.Find(cAuthor).Each(func(index int, element *goquery.Selection) { link, _ := element.Attr("href") fmt.Println("====================") fmt.Println("AuthorName: ", element.Text()) fmt.Printf("AuthorLink: %s\n", link) }) vdos := []string{} doc.Find(cVideo).Each(func(index int, element *goquery.Selection) { embed, _ := element.Attr("src") vdos = append(vdos, embed) }) fmt.Printf("ItemVideos: %s\n", vdos) imgs := []string{} fmt.Println("ItemImages: ") doc.Find(cItemImages).Each(func(index int, element *goquery.Selection) { img, _ := element.Attr("href") imgs = append(imgs, fmt.Sprintf("https://%v", img)) }) fmt.Println("", imgs) } func findEmail(body string, doms string) (emails []string) { r, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`) emails = append(emails, r.FindStringSubmatch(body)...) return }