main.go 2.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. "net/http"
  6. "regexp"
  7. "github.com/PuerkitoBio/goquery"
  8. )
  9. func main() {
  10. // Wordpress
  11. // url := "https://addand.kr/shop/new-%ed%95%9c-%ea%b6%8c%ec%9c%bc%eb%a1%9c-%eb%81%9d%eb%82%98%eb%8a%94-%eb%85%b8%ec%85%98/"
  12. url := "https://droppii.net.vn/cnd-ginseng-gold"
  13. // Send an HTTP GET request to the URL
  14. response, err := http.Get(url)
  15. if err != nil {
  16. log.Fatal(err)
  17. }
  18. defer response.Body.Close()
  19. doc, err := goquery.NewDocumentFromReader(response.Body)
  20. if err != nil {
  21. log.Fatal(err)
  22. }
  23. // These will the value of PDP parsing structures
  24. cItemName := ".product_title"
  25. cShortDesc := ".woocommerce-product-details__short-description"
  26. cItemCategory := ".single-breadcrumbs-wrapper"
  27. cItemTags := ".tagged_as"
  28. cItemImages := ".woocommerce-product-gallery__image a"
  29. cItemTextDesc := "div.product-tabs-wrapper"
  30. // cItemOptions := "select#rating option"
  31. cItemPrice := ".summary>.summary-inner>.price .woocommerce-Price-amount.amount"
  32. cEmail := "footer"
  33. cAddress := ".elementor-widget-container > p"
  34. cRating := ".star-rating .rating"
  35. cAuthor := ".product-brand a"
  36. cVideo := "iframe"
  37. fmt.Println("ItemName: ", doc.Find(cItemName).First().Text())
  38. fmt.Println("ShortDesc: ", doc.Find(cShortDesc).First().Text())
  39. fmt.Println("ItemCategory: ", doc.Find(cItemCategory).First().Text())
  40. fmt.Println("ItemTags: ", doc.Find(cItemTags).First().Text())
  41. fmt.Println("TextDesc: ", doc.Find(cItemTextDesc).First().Text())
  42. fmt.Println("ItemPice: ", doc.Find(cItemPrice).First().Text())
  43. fmt.Println("Email: ", findEmail(doc.Find(cEmail).First().Text(), ""))
  44. fmt.Println("Address: ", doc.Find(cAddress).First().Text())
  45. fmt.Println("Rating : ", doc.Find(cRating).First().Text())
  46. // doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) {
  47. // v, _ := s.Attr("property")
  48. // fmt.Println(v, " : ", s.AttrOr("content", ""))
  49. // })
  50. fmt.Println("Author : ")
  51. doc.Find(cAuthor).Each(func(index int, element *goquery.Selection) {
  52. link, _ := element.Attr("href")
  53. fmt.Println("====================")
  54. fmt.Println("AuthorName: ", element.Text())
  55. fmt.Printf("AuthorLink: %s\n", link)
  56. })
  57. vdos := []string{}
  58. doc.Find(cVideo).Each(func(index int, element *goquery.Selection) {
  59. embed, _ := element.Attr("src")
  60. vdos = append(vdos, embed)
  61. })
  62. fmt.Printf("ItemVideos: %s\n", vdos)
  63. imgs := []string{}
  64. fmt.Println("ItemImages: ")
  65. doc.Find(cItemImages).Each(func(index int, element *goquery.Selection) {
  66. img, _ := element.Attr("href")
  67. imgs = append(imgs, img)
  68. })
  69. fmt.Println("", imgs)
  70. }
  71. func findEmail(body string, doms string) (emails []string) {
  72. r, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`)
  73. emails = append(emails, r.FindStringSubmatch(body)...)
  74. return
  75. }