1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- package common
- import (
- "kkscrap-go/model"
- "regexp"
- "strings"
- util "kkscrap-go/controllers/scraper/util"
- "github.com/PuerkitoBio/goquery"
- )
- func GetLanguage(body string) string {
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
- util.CheckError(err)
- ret, _ := doc.Find("html").Attr("lang")
- return ret
- }
- func GetEmails(body string) (ret []string) {
- // reg := regexp.MustCompile("[a-zA-Z0-9.!#$%&*+\\-/=?^_`{|}~]+@[a-z0-9.\\-]+\\.[a-z]+")
- reg, _ := regexp.Compile(`[a-zA-Z0-9_+&*-]+(?:\.[a-zA-Z0-9_+&*-]+)*@(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,24}`)
- // fmt.Println(r.FindStringSubmatch("주식회사 서울니트디자인 대표: 윤근영 사업자등록번호: 129-86-89635 통신판매업번호 : 2021-성남중원-0367 주소: 경기도 성남시 중원구 갈마치로 215, 금강펜테리움아이티타워 A동 309호(상대원동) 전화: 031-730-0525 이메일: SOYOON58@NAVER.COM COPYRIGHT(C) WWW.SEOULKNIT.COM. ALL RIGHTS RESERVED. HOSTING BY CODEM"))
- ss := reg.FindAllStringSubmatch(body, -1)
- m := make(map[string]bool)
- for _, s := range ss {
- e := s[0]
- if e == "support@crema.me" || e == "support@snapvi.co.kr" || strings.HasPrefix(e, "/") || strings.HasPrefix(e, "http://") ||
- strings.HasPrefix(e, "https://") || strings.HasSuffix(e, ".png") || strings.HasSuffix(e, ".jpg") || strings.HasSuffix(e, ".js") {
- continue
- }
- m[s[0]] = true
- }
- for k, _ := range m {
- ret = append(ret, k)
- }
- return ret
- }
- func Parse(uri string, item *model.ItemInfo) {
- body, err := util.Get(uri)
- util.CheckError(err)
- item.Language = GetLanguage(body)
- doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
- util.CheckError(err)
- doc.Find("meta").Each(func(i int, s *goquery.Selection) {
- v, _ := s.Attr("property")
- if v == "og:description" {
- item.ShortDesc = s.AttrOr("content", "")
- } else if v == "og:title" {
- item.ItemName = s.AttrOr("content", "")
- } else if v == "og:image" {
- item.Images = append(item.Images, s.AttrOr("content", ""))
- } else if v == "product:price:amount" {
- item.SalesPrice = util.GetFloat32(s.AttrOr("content", ""))
- } else if v == "product:price:currency" {
- item.Currency = s.AttrOr("content", "")
- }
- })
- return
- }
|