Przeglądaj źródła

우커머스 상품상세 추출 class 변경

hakjinlee 1 rok temu
rodzic
commit
5b8b015033

+ 8 - 0
.idea/.gitignore

@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

+ 9 - 0
.idea/erp-kkscrap.iml

@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="WEB_MODULE" version="4">
+  <component name="Go" enabled="true" />
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

+ 8 - 0
.idea/modules.xml

@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/erp-kkscrap.iml" filepath="$PROJECT_DIR$/.idea/erp-kkscrap.iml" />
+    </modules>
+  </component>
+</project>

+ 6 - 0
.idea/vcs.xml

@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

+ 10 - 8
controllers/scraper/wordpress/parser.go

@@ -3,13 +3,12 @@ package wordpress
 import (
 	"encoding/json"
 	"fmt"
+	"github.com/PuerkitoBio/goquery"
 	"kkscrap-go/controllers/scraper/common"
+	"kkscrap-go/controllers/scraper/util"
 	"kkscrap-go/model"
+	"regexp"
 	"strings"
-
-	util "kkscrap-go/controllers/scraper/util"
-
-	"github.com/PuerkitoBio/goquery"
 )
 
 func Parse(uri string, item *model.ItemInfo) {
@@ -21,7 +20,7 @@ func Parse(uri string, item *model.ItemInfo) {
 	item.SalesPrice = util.GetFloat32(p.Offers[0].Price)
 	item.Sku = fmt.Sprintf("%v", p.Sku)
 	item.ShortDesc = p.Description
-	item.OriginDesc = getDesc(body)
+	item.OriginDesc = getProductDescription(body)
 	item.TextDesc = getTextDesc(body)
 	item.ItemName = p.Name
 	item.Currency = p.Offers[0].Pricecurrency
@@ -31,10 +30,10 @@ func Parse(uri string, item *model.ItemInfo) {
 	return
 }
 
-func getDesc(body string) string {
+func getProductDescription(body string) string {
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
 	util.CheckError(err)
-	s := doc.Find("div.woocommerce-tabs")
+	s := doc.Find("div.woocommerce-Tabs-panel--description")
 	//s.Each(func(i int, selection *goquery.Selection) {
 	//	log.Println(selection.Html())
 	//})
@@ -96,9 +95,12 @@ func getImages(body string) (ret []string) {
 	//	return ret[i].Width*ret[i].Height > ret[j].Width*ret[j].Height
 	//})
 	//ret = ret[:1]
+
+	re := regexp.MustCompile(`-\d+x\d+\.`)
 	doc.Find("figure.woocommerce-product-gallery__wrapper div").Each(func(i int, s *goquery.Selection) {
 		if src, ok := s.Attr("data-thumb"); ok {
-			ret = append(ret, src)
+			newImgUrl := re.ReplaceAllString(src, ".")
+			ret = append(ret, newImgUrl)
 		}
 	})
 

+ 20 - 11
controllers/scraper/wordpress/parser_test.go

@@ -1,39 +1,39 @@
 package wordpress
 
 import (
-	util "kkscrap-go/controllers/scraper/util"
-	"testing"
-
+	"commerce-crwaler/util"
 	"github.com/stretchr/testify/assert"
+	"os"
+	"testing"
 )
 
 func TestGetImages(t *testing.T) {
 	body, err := util.Get("https://seoulknit.com/shop/collar-half-sleeved-top_blue/")
 	assert.Equal(t, nil, err)
 	imgs := getImages(body)
-	assert.Equal(t, "https://seoulknit.com/wp-content/uploads/2021/05/abito_0594-scaled-454x681.jpg", imgs[0].Path)
-	assert.Equal(t, "https://seoulknit.com/wp-content/uploads/2021/05/abito_0594-scaled-100x100.jpg", imgs[1].Path)
+	assert.Equal(t, "https://seoulknit.com/wp-content/uploads/2021/05/abito_0594-scaled-454x681.jpg", imgs[0])
+	assert.Equal(t, "https://seoulknit.com/wp-content/uploads/2021/05/abito_0594-scaled-100x100.jpg", imgs[1])
 
 	body, err = util.Get("https://www.dollshecraft.com/product/amanda-beauty-26f-classic-maxi-65cm-special-package-2/")
 	assert.Equal(t, nil, err)
 	imgs = getImages(body)
-	assert.Equal(t, "https://cdn.dollshecraft.com/wp-content/uploads/2018/08/Amanda-Beauty_558743_01.jpg", imgs[0].Path)
+	assert.Equal(t, "https://cdn.dollshecraft.com/wp-content/uploads/2018/08/Amanda-Beauty_558743_01.jpg", imgs[0])
 
 	body, err = util.Get("https://www.shoprootscience.com/shop/arctic-c-vitamin-c-serum")
 	assert.Equal(t, nil, err)
 	imgs = getImages(body)
-	assert.Equal(t, "https://www.shoprootscience.com/wp-content/uploads/2020/10/Arctic-C-product-511x634.jpg", imgs[0].Path)
-	assert.Equal(t, "https://www.shoprootscience.com/wp-content/uploads/2020/10/Arctic-C-product.jpg", imgs[1].Path)
+	assert.Equal(t, "https://www.shoprootscience.com/wp-content/uploads/2020/10/Arctic-C-product-511x634.jpg", imgs[0])
+	assert.Equal(t, "https://www.shoprootscience.com/wp-content/uploads/2020/10/Arctic-C-product.jpg", imgs[1])
 
 	body, err = util.Get("https://strandbergguitars.com/product/boden-6-dr-titanium/")
 	assert.Equal(t, nil, err)
 	imgs = getImages(body)
-	assert.Equal(t, "https://strandbergguitars.com/cdn-cgi/image/onerror=redirect,quality=85,format=auto,metadata=copyright,width=600,height=901,fit=cover/wp-content/uploads/2021/04/bd6ct-20dr-k-p-ti_6.png", imgs[0].Path)
+	assert.Equal(t, "https://strandbergguitars.com/cdn-cgi/image/onerror=redirect,quality=85,format=auto,metadata=copyright,width=600,height=901,fit=cover/wp-content/uploads/2021/04/bd6ct-20dr-k-p-ti_6.png", imgs[0])
 
 	body, err = util.Get("https://lanbelle.com/product/%EB%9E%91%EB%B2%A8-%EC%88%98%ED%8D%BC%EB%82%B4%EC%B8%84%EB%9F%B4-%ED%81%AC%EB%A6%BC/")
 	assert.Equal(t, nil, err)
 	imgs = getImages(body)
-	assert.Equal(t, "https://strandbergguitars.com/cdn-cgi/image/onerror=redirect,quality=85,format=auto,metadata=copyright,width=600,height=901,fit=cover/wp-content/uploads/2021/04/bd6ct-20dr-k-p-ti_6.png", imgs[0].Path)
+	assert.Equal(t, "https://strandbergguitars.com/cdn-cgi/image/onerror=redirect,quality=85,format=auto,metadata=copyright,width=600,height=901,fit=cover/wp-content/uploads/2021/04/bd6ct-20dr-k-p-ti_6.png", imgs[0])
 
 }
 
@@ -73,7 +73,16 @@ func TestGetWordPressProduct(t *testing.T) {
 	body, err = util.Get("https://strandbergguitars.com/product/boden-6-dr-titanium/")
 	assert.Equal(t, nil, err)
 	ret = getProduct(body)
-	assert.Equal(t, ret.Sku, "BD6CT-20DR-K-P-TI")
+	assert.Equal(t, "BD6CT-20DRB-K-P-TI", ret.Sku)
+
+	t.Run("상품설명", func(t *testing.T) {
+		body, err := util.Get("https://www.mehisox.com/shop/sn388/")
+		assert.Equal(t, nil, err)
+		html := getProductDescription(body)
+		os.WriteFile("c:\\temp\\test.html", []byte(html), 0644)
+		assert.Equal(t, ``, html)
+	})
+
 }
 
 func TestGetCategories(t *testing.T) {