main.go 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. "net/http"
  6. "regexp"
  7. "strings"
  8. "github.com/PuerkitoBio/goquery"
  9. )
  10. func main() {
  11. // Wordpress
  12. // url := "https://addand.kr/shop/new-%ed%95%9c-%ea%b6%8c%ec%9c%bc%eb%a1%9c-%eb%81%9d%eb%82%98%eb%8a%94-%eb%85%b8%ec%85%98/"
  13. url := "https://www.youngla.com/products/yla-ftshorts-black"
  14. // Send an HTTP GET request to the URL
  15. response, err := http.Get(url)
  16. if err != nil {
  17. log.Fatal(err)
  18. }
  19. defer response.Body.Close()
  20. doc, err := goquery.NewDocumentFromReader(response.Body)
  21. if err != nil {
  22. log.Fatal(err)
  23. }
  24. // These will the value of PDP parsing structures
  25. cItemName := ".ProductMeta__Title.Heading.u-h2"
  26. cShortDesc := ".shopify-installments span#shopify-installments-content"
  27. cItemCategory := ".single-breadcrumbs-wrapper"
  28. cItemTags := ".tagged_as"
  29. cItemImages := ".AspectRatio.AspectRatio--withFallback img"
  30. cItemTextDesc := ".ProductMeta__Description"
  31. // // cItemOptions := "select#rating option"
  32. cItemPrice := ".ProductMeta>.ProductMeta__PriceList.Heading>.ProductMeta__Price.Price.Text--subdued.u-h4"
  33. cEmail := "footer"
  34. cInfo := ".Footer__Block.Footer__Block--text>.Footer__Content.Rte > p"
  35. cRating := ".star-rating .rating"
  36. cAuthor := ".product-brand a"
  37. cVideo := "iframe"
  38. fmt.Println("ItemName: ", doc.Find(cItemName).First().Text())
  39. fmt.Println("ShortDesc: ", doc.Find(cShortDesc).First().Text())
  40. fmt.Println("ItemCategory: ", doc.Find(cItemCategory).First().Text())
  41. fmt.Println("ItemTags: ", doc.Find(cItemTags).First().Text())
  42. fmt.Println("TextDesc: ", doc.Find(cItemTextDesc).First().Text())
  43. fmt.Println("ItemPice: ", doc.Find(cItemPrice).First().Text())
  44. fmt.Println("Email: ", findEmail(doc.Find(cEmail).First().Text(), ""))
  45. fmt.Println("Info: ", doc.Find(cInfo).First().Text())
  46. fmt.Println("Rating : ", doc.Find(cRating).First().Text())
  47. // doc.Find("link ~ meta").Each(func(i int, s *goquery.Selection) {
  48. // v, _ := s.Attr("property")
  49. // fmt.Println(v, " : ", s.AttrOr("content", ""))
  50. // })
  51. fmt.Println("Author : ")
  52. doc.Find(cAuthor).Each(func(index int, element *goquery.Selection) {
  53. link, _ := element.Attr("href")
  54. fmt.Println("====================")
  55. fmt.Println("AuthorName: ", element.Text())
  56. fmt.Printf("AuthorLink: %s\n", link)
  57. })
  58. vdos := []string{}
  59. doc.Find(cVideo).Each(func(index int, element *goquery.Selection) {
  60. embed, _ := element.Attr("src")
  61. vdos = append(vdos, embed)
  62. })
  63. fmt.Printf("ItemVideos: %s\n", vdos)
  64. imgs := []string{}
  65. fmt.Println("ItemImages: ")
  66. doc.Find(cItemImages).Each(func(index int, element *goquery.Selection) {
  67. img, _ := element.Attr("data-original-src")
  68. res := strings.ReplaceAll(img, "//", "https://")
  69. imgs = append(imgs, res)
  70. })
  71. fmt.Println("", imgs)
  72. }
  73. func findEmail(body string, doms string) (emails []string) {
  74. r, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`)
  75. emails = append(emails, r.FindStringSubmatch(body)...)
  76. return
  77. }