123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108 |
- package main
- import (
- "fmt"
- "log"
- "net/http"
- "regexp"
- "github.com/PuerkitoBio/goquery"
- )
- func main() {
- // Wordpress
- url := "https://bt-beloria-1.myshopify.com/collections/women-collection/products/sweater-classical-tshirt"
- // url := "https://seoulknit.com/shop/v-neck-summer-pullover/"
- // url = "https://www.elcanto.co.kr" //MakeShop
- // url = "https://www.ippngirl.co.kr" //MakeShop
- // url = "https://ssline.kr" //cafe24
- // url = "https://koskomro.com" //cafe24
- // url = "https://bt-beloria-1.myshopify.com" //shopify
- // url = "https://lachinatakorea.com" //Godomall
- // url = "https://sf-fd.com" //Godomall
- // url = "https://www.vanillagift.com" //Magento
- // url = "https://taiwan.coach.com" //Magento
- // url = "http://mas1.magikthemes.com" //Magento
- // url = "https://aladinmarket.co.kr" //young Cart
- // url = "http://damoagift.com" //young Cart
- // url = "https://p2u.daboryhost.com" /DaboryShop
- // url = "https://seoulknit.com" //Woocommerce
- // url = "http://webhost.dabory.com/" /Woocommerce
- // url = "https://addand.kr" // WooCommerce
- // url := "https://droppii.net.vn/cnd-ginseng-gold"
- // Send an HTTP GET request to the URL
- response, err := http.Get(url)
- if err != nil {
- log.Fatal(err)
- }
- defer response.Body.Close()
- doc, err := goquery.NewDocumentFromReader(response.Body)
- if err != nil {
- log.Fatal(err)
- }
- // These will the value of PDP parsing structures
- cItemName := ".product-single__title"
- cShortDesc := ".product-single__description"
- cItemCategory := ".product-single__sub-info:nth-of-type(1)"
- cItemTags := ".product-single__sub-info:nth-of-type(2)"
- cItemImages := ".pg__thumb a"
- cItemTextDesc := ".product-single__description"
- // cItemOptions := "select#rating option"
- cItemPrice := ".product-price-wrap>.product-price>.product-price__price"
- cEmail := "footer"
- cAddress := ".elementor-widget-container > p"
- cRating := ".pg__review-stars .spr-icon-star"
- cAuthor := ".product-brand a"
- cVideo := "iframe"
- fmt.Println("ItemName: ", doc.Find(cItemName).First().Text())
- fmt.Println("ShortDesc: ", doc.Find(cShortDesc).First().Text())
- fmt.Println("ItemCategory: ", doc.Find(cItemCategory).First().Text())
- fmt.Println("ItemTags: ", doc.Find(cItemTags).First().Text())
- fmt.Println("TextDesc: ", doc.Find(cItemTextDesc).First().Text())
- fmt.Println("ItemPice: ", doc.Find(cItemPrice).First().Text())
- fmt.Println("Email: ", findEmail(doc.Find(cEmail).First().Text(), ""))
- fmt.Println("Address: ", doc.Find(cAddress).First().Text())
- fmt.Println("Rating : ", doc.Find(cRating).First().Text())
- // doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) {
- // v, _ := s.Attr("property")
- // fmt.Println(v, " : ", s.AttrOr("content", ""))
- // })
- fmt.Println("Author : ")
- doc.Find(cAuthor).Each(func(index int, element *goquery.Selection) {
- link, _ := element.Attr("href")
- fmt.Println("====================")
- fmt.Println("AuthorName: ", element.Text())
- fmt.Printf("AuthorLink: %s\n", link)
- })
- vdos := []string{}
- doc.Find(cVideo).Each(func(index int, element *goquery.Selection) {
- embed, _ := element.Attr("src")
- vdos = append(vdos, embed)
- })
- fmt.Printf("ItemVideos: %s\n", vdos)
- imgs := []string{}
- fmt.Println("ItemImages: ")
- doc.Find(cItemImages).Each(func(index int, element *goquery.Selection) {
- img, _ := element.Attr("href")
- imgs = append(imgs, fmt.Sprintf("https://%v", img))
- })
- fmt.Println("", imgs)
- }
- func findEmail(body string, doms string) (emails []string) {
- r, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`)
- emails = append(emails, r.FindStringSubmatch(body)...)
- return
- }
|