main.go 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. "net/http"
  6. "regexp"
  7. "github.com/PuerkitoBio/goquery"
  8. )
  9. func main() {
  10. // Wordpress
  11. url := "https://bt-beloria-1.myshopify.com/collections/women-collection/products/sweater-classical-tshirt"
  12. // url := "https://seoulknit.com/shop/v-neck-summer-pullover/"
  13. // url = "https://www.elcanto.co.kr" //MakeShop
  14. // url = "https://www.ippngirl.co.kr" //MakeShop
  15. // url = "https://ssline.kr" //cafe24
  16. // url = "https://koskomro.com" //cafe24
  17. // url = "https://bt-beloria-1.myshopify.com" //shopify
  18. // url = "https://lachinatakorea.com" //Godomall
  19. // url = "https://sf-fd.com" //Godomall
  20. // url = "https://www.vanillagift.com" //Magento
  21. // url = "https://taiwan.coach.com" //Magento
  22. // url = "http://mas1.magikthemes.com" //Magento
  23. // url = "https://aladinmarket.co.kr" //young Cart
  24. // url = "http://damoagift.com" //young Cart
  25. // url = "https://p2u.daboryhost.com" /DaboryShop
  26. // url = "https://seoulknit.com" //Woocommerce
  27. // url = "http://webhost.dabory.com/" /Woocommerce
  28. // url = "https://addand.kr" // WooCommerce
  29. // url := "https://droppii.net.vn/cnd-ginseng-gold"
  30. // Send an HTTP GET request to the URL
  31. response, err := http.Get(url)
  32. if err != nil {
  33. log.Fatal(err)
  34. }
  35. defer response.Body.Close()
  36. doc, err := goquery.NewDocumentFromReader(response.Body)
  37. if err != nil {
  38. log.Fatal(err)
  39. }
  40. // These will the value of PDP parsing structures
  41. cItemName := ".product-single__title"
  42. cShortDesc := ".product-single__description"
  43. cItemCategory := ".product-single__sub-info:nth-of-type(1)"
  44. cItemTags := ".product-single__sub-info:nth-of-type(2)"
  45. cItemImages := ".pg__thumb a"
  46. cItemTextDesc := ".product-single__description"
  47. // cItemOptions := "select#rating option"
  48. cItemPrice := ".product-price-wrap>.product-price>.product-price__price"
  49. cEmail := "footer"
  50. cAddress := ".elementor-widget-container > p"
  51. cRating := ".pg__review-stars .spr-icon-star"
  52. cAuthor := ".product-brand a"
  53. cVideo := "iframe"
  54. fmt.Println("ItemName: ", doc.Find(cItemName).First().Text())
  55. fmt.Println("ShortDesc: ", doc.Find(cShortDesc).First().Text())
  56. fmt.Println("ItemCategory: ", doc.Find(cItemCategory).First().Text())
  57. fmt.Println("ItemTags: ", doc.Find(cItemTags).First().Text())
  58. fmt.Println("TextDesc: ", doc.Find(cItemTextDesc).First().Text())
  59. fmt.Println("ItemPice: ", doc.Find(cItemPrice).First().Text())
  60. fmt.Println("Email: ", findEmail(doc.Find(cEmail).First().Text(), ""))
  61. fmt.Println("Address: ", doc.Find(cAddress).First().Text())
  62. fmt.Println("Rating : ", doc.Find(cRating).First().Text())
  63. // doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) {
  64. // v, _ := s.Attr("property")
  65. // fmt.Println(v, " : ", s.AttrOr("content", ""))
  66. // })
  67. fmt.Println("Author : ")
  68. doc.Find(cAuthor).Each(func(index int, element *goquery.Selection) {
  69. link, _ := element.Attr("href")
  70. fmt.Println("====================")
  71. fmt.Println("AuthorName: ", element.Text())
  72. fmt.Printf("AuthorLink: %s\n", link)
  73. })
  74. vdos := []string{}
  75. doc.Find(cVideo).Each(func(index int, element *goquery.Selection) {
  76. embed, _ := element.Attr("src")
  77. vdos = append(vdos, embed)
  78. })
  79. fmt.Printf("ItemVideos: %s\n", vdos)
  80. imgs := []string{}
  81. fmt.Println("ItemImages: ")
  82. doc.Find(cItemImages).Each(func(index int, element *goquery.Selection) {
  83. img, _ := element.Attr("href")
  84. imgs = append(imgs, fmt.Sprintf("https://%v", img))
  85. })
  86. fmt.Println("", imgs)
  87. }
  88. func findEmail(body string, doms string) (emails []string) {
  89. r, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`)
  90. emails = append(emails, r.FindStringSubmatch(body)...)
  91. return
  92. }