main.go 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. package main
  2. import (
  3. "fmt"
  4. "log"
  5. "net/http"
  6. "regexp"
  7. "github.com/PuerkitoBio/goquery"
  8. )
  9. func main() {
  10. // Wordpress
  11. url := "https://addand.kr/shop/new-%ed%95%9c-%ea%b6%8c%ec%9c%bc%eb%a1%9c-%eb%81%9d%eb%82%98%eb%8a%94-%eb%85%b8%ec%85%98/" // Replace with the URL of your choice
  12. // Send an HTTP GET request to the URL
  13. response, err := http.Get(url)
  14. if err != nil {
  15. log.Fatal(err)
  16. }
  17. defer response.Body.Close()
  18. doc, err := goquery.NewDocumentFromReader(response.Body)
  19. if err != nil {
  20. log.Fatal(err)
  21. }
  22. // These will the value of PDP parsing structures
  23. cItemName := ".product_title"
  24. cShortDesc := ".woocommerce-product-details__short-description"
  25. cItemCategory := ".posted_in"
  26. cItemTags := ".tagged_as"
  27. cItemImages := ".woocommerce-product-gallery__image img"
  28. cItemTextDesc := "div.woocommerce-tabs"
  29. // cItemOptions := "select#rating option"
  30. cItemPrice := ".summary>.price .woocommerce-Price-amount.amount"
  31. cEmail := "footer"
  32. cRating := ".star-rating .rating"
  33. cAuthor := ".product-brand a"
  34. cVideo := "iframe"
  35. // Use the Find method to select elements that match the css selector
  36. // doc.Find(cItemName).Each(func(index int, element *goquery.Selection) {
  37. // // Extract the text associated with the selected element
  38. // text := element.Text()
  39. // fmt.Printf("Text associated with %s: %s\n", cItemName, text)
  40. // })
  41. fmt.Println("ItemName: ", doc.Find(cItemName).First().Text())
  42. fmt.Println("ShortDesc: ", doc.Find(cShortDesc).First().Text())
  43. fmt.Println("ItemCategory: ", doc.Find(cItemCategory).First().Text())
  44. fmt.Println("ItemTags: ", doc.Find(cItemTags).First().Text())
  45. fmt.Println("TextDesc: ", doc.Find(cItemTextDesc).First().Text())
  46. fmt.Println("ItemPice: ", doc.Find(cItemPrice).First().Text())
  47. fmt.Println("Email: ", findEmail(doc.Find(cEmail).First().Text(), ""))
  48. fmt.Println("Rating : ", doc.Find(cRating).First().Text())
  49. fmt.Println("Author : ")
  50. doc.Find(cAuthor).Each(func(index int, element *goquery.Selection) {
  51. link, _ := element.Attr("href")
  52. fmt.Println("====================")
  53. fmt.Println("AuthorName: ", element.Text())
  54. fmt.Printf("AuthorLink: %s\n", link)
  55. })
  56. vdos := []string{}
  57. doc.Find(cVideo).Each(func(index int, element *goquery.Selection) {
  58. embed, _ := element.Attr("src")
  59. vdos = append(vdos, embed)
  60. })
  61. fmt.Printf("ItemImages: %s\n", vdos)
  62. imgs := []string{}
  63. doc.Find(cItemImages).Each(func(index int, element *goquery.Selection) {
  64. img, _ := element.Attr("src")
  65. imgs = append(imgs, img)
  66. })
  67. fmt.Printf("ItemImages: %s\n", imgs)
  68. }
  69. func findEmail(body string, doms string) (emails []string) {
  70. r, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`)
  71. emails = append(emails, r.FindStringSubmatch(body)...)
  72. return
  73. }