Selaa lähdekoodia

solution-type-get, product-page-get 구현 및 테마 추가

hakjinlee 1 vuosi sitten
vanhempi
commit
77cc8311a8

+ 1 - 4
controllers/scraper/cafe24/parser.go

@@ -9,10 +9,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-func Parse(uri string, item *model.ItemInfo) {
-	body, err := util.Get(uri)
-	util.CheckError(err)
-
+func Parse(body string, item *model.ItemInfo) {
 	item.Language = common.GetLanguage(body)
 
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))

+ 1 - 4
controllers/scraper/godo/parser.go

@@ -11,10 +11,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-func Parse(uri string, item *model.ItemInfo) {
-	body, err := util.Get(uri)
-	util.CheckError(err)
-
+func Parse(body string, item *model.ItemInfo) {
 	item.Language = getLanguage(body)
 
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))

+ 1 - 4
controllers/scraper/magento/parser.go

@@ -10,10 +10,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-func Parse(uri string, item *model.ItemInfo) {
-	body, err := util.Get(uri)
-	util.CheckError(err)
-
+func Parse(body string, item *model.ItemInfo) {
 	item.Language = common.GetLanguage(body)
 
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))

+ 1 - 4
controllers/scraper/shopify/parser.go

@@ -10,10 +10,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-func Parse(uri string, item *model.ItemInfo) {
-	body, err := util.Get(uri)
-	util.CheckError(err)
-
+func Parse(body string, item *model.ItemInfo) {
 	item.Language = common.GetLanguage(body)
 
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))

+ 99 - 39
controllers/scraper/tct-main.go

@@ -7,18 +7,16 @@ import (
 	"kkscrap-go/controllers/scraper/godo"
 	"kkscrap-go/controllers/scraper/magento"
 	"kkscrap-go/controllers/scraper/shopify"
-	"kkscrap-go/locals"
-
+	"kkscrap-go/controllers/scraper/util"
 	"kkscrap-go/controllers/scraper/wordpress"
 	"kkscrap-go/controllers/scraper/young"
+	"kkscrap-go/locals"
 	"kkscrap-go/model"
 	"net/http"
 	"net/url"
 	"regexp"
 	"strings"
 
-	util "kkscrap-go/controllers/scraper/util"
-
 	// "golang.org/x/crypto/bcrypt"
 
 	"github.com/labstack/echo"
@@ -37,18 +35,23 @@ func SolutionTypeGet(c echo.Context) error {
 		ThemeType    string
 	}{}
 
+	body, err := util.Get(v.Url)
+	if err != nil {
+		return err
+	}
+
 	fmt.Println(v.Url)
-	// (1) function 공유될 수 있도록 해서 작업요
-	// v.SolutionType, v.ThemeType = solType(&kkk)
-	retv.SolutionType = "Wordpress"
-	retv.ThemeType = "Avada"
-	// ret, _ := json.MarshalIndent(itemInfo, "", "\t")
-	ret, _ := json.Marshal(retv)
-	return c.JSONBlob(http.StatusOK, ret)
+	st, theme, err := getSolutionType(body)
+	if err != nil {
+		return c.JSONBlob(http.StatusOK, []byte(err.Error()))
+	}
+	retv.SolutionType = string(st)
+	retv.ThemeType = theme
+	return c.JSON(http.StatusOK, retv)
 }
 
 type ProductPageGetReq struct {
-	SolutionType string
+	SolutionType model.SolutionType
 	ThemeType    string
 	Products     []ProductUri
 }
@@ -65,22 +68,28 @@ func ProductPageGet(c echo.Context) error {
 	v := c.Get("receiver").(ProductPageGetReq)
 
 	var vRet locals.ProductPage // Row(개별레코드)->Page(Row의 집합)->Book(Page의 집합)의 개념
-	for _, row := range v.Products {
+	for _, p := range v.Products {
 
-		// (1)Url 의 HTML를 2번 가져오는데 아래와 같이 1번만 가져와서 처리할 수 있도록 수정요.
-		// kkk := htmlGet(row.Uri)
+		body, err := util.Get(p.Uri)
+		if err != nil {
+			return err
+		}
 
 		// 전체 웹사이트가 아니라 개별 상품페이지(1개페이지)의 경우 SolutionType 없이 request됨
 		if v.SolutionType == "" {
-			// v.SolutionType, v.ThemeType = solType(&kkk)
+			var err error
+			v.SolutionType, v.ThemeType, err = getSolutionType(body)
+			if err != nil {
+				return c.String(http.StatusBadRequest, err.Error())
+			}
 		}
 
-		prodInfo, err := parse(row.Uri)
+		prodInfo, err := parse(p.Uri, body)
 		if err != nil {
 			prodInfo.ItemNick = "Parsing Failed"
 		}
 		// 개별 prodInfo가 계속 추가될 수 있도록 코드를 변경요.
-		// vRet.ProductPage = append(vRet.ProductPage, *prodInfo)
+		vRet.ProductPage = append(vRet.ProductPage, toProductPage(prodInfo))
 	}
 
 	// ret, _ := json.MarshalIndent(itemInfo, "", "\t")
@@ -88,6 +97,51 @@ func ProductPageGet(c echo.Context) error {
 	return c.JSONBlob(http.StatusOK, ret)
 }
 
+func toProductPage(info model.ItemInfo) locals.Product {
+	ret := locals.Product{
+		SolutionName:  locals.SolutionType(info.SolutionName),
+		Version:       info.Version,
+		Emails:        info.Emails,
+		DomainName:    info.DomainName,
+		DomainURI:     info.DomainURI,
+		ItemName:      info.ItemName,
+		ItemNick:      info.ItemNick,
+		ModelName:     info.ModelName,
+		ModelNo:       info.ModelNo,
+		BrandName:     info.BrandName,
+		Sku:           info.Sku,
+		ItemCategory:  info.ItemCategory,
+		Manufacturer:  info.Manufacturer,
+		Origin:        info.Origin,
+		Language:      info.Language,
+		Currency:      info.Currency,
+		SalesPrice:    info.SalesPrice,
+		DeliveryPrice: info.DeliveryPrice,
+		MinimumQty:    info.MinimumQty,
+		UserCredit:    info.UserCredit,
+		Options:       nil,
+		Images:        info.Images,
+		ShortDesc:     info.ShortDesc,
+		OriginDesc:    info.OriginDesc,
+		TextDesc:      info.TextDesc,
+	}
+
+	for i, v := range info.Options {
+		ret.Options = append(ret.Options, locals.Option{
+			Name: v.Name,
+		})
+		ret.Options[i].Choices = make([]locals.Choice, 0)
+		for _, choice := range v.Choices {
+			ret.Options[i].Choices = append(ret.Options[i].Choices, locals.Choice{
+				Name:  choice.Name,
+				Price: choice.Price,
+			})
+		}
+	}
+
+	return ret
+}
+
 type ItemUrlScrapReq struct {
 	ItemUrl string
 }
@@ -96,11 +150,12 @@ func ItemUrlScrap(c echo.Context) error {
 
 	v := c.Get("receiver").(ItemUrlScrapReq)
 
-	// retv := &struct {
-	// 	model.ItemInfo
-	// }{}
+	body, err := util.Get(v.ItemUrl)
+	if err != nil {
+		return err
+	}
 
-	itemInfo, err := parse(v.ItemUrl)
+	itemInfo, err := parse(v.ItemUrl, body)
 	if err != nil {
 		return c.String(604, "ertvwerawqfd-ItemUrl Parse failed: "+err.Error())
 	}
@@ -125,16 +180,16 @@ func getTitle(body string) string {
 	return ""
 }
 
-func parse(uri string) (ret model.ItemInfo, err error) {
-	t, err := getSolutionType(uri)
+func parse(uri, body string) (ret model.ItemInfo, err error) {
+	t, theme, err := getSolutionType(body)
 	if err != nil {
 		return
 	}
-	ret = parseSolution(t, uri)
+	ret = parseSolution(t, theme, uri, body)
 	return
 }
 
-func parseSolution(t model.SolutionType, uri string) (ret model.ItemInfo) {
+func parseSolution(t model.SolutionType, theme, uri, body string) (ret model.ItemInfo) {
 	ret.SolutionName = t
 	u, err := url.Parse(uri)
 	if err != nil {
@@ -144,29 +199,24 @@ func parseSolution(t model.SolutionType, uri string) (ret model.ItemInfo) {
 	ret.DomainURI = uri
 
 	if t == model.SolutionTypeWooCommerce {
-		wordpress.Parse(uri, &ret)
+		wordpress.Parse(body, &ret)
 	} else if t == model.SolutionTypeShopify {
-		shopify.Parse(uri, &ret)
+		shopify.Parse(body, &ret)
 	} else if t == model.SolutionTypeMagento {
-		magento.Parse(uri, &ret)
+		magento.Parse(body, &ret)
 	} else if t == model.SolutionTypeCafe24 {
-		cafe24.Parse(uri, &ret)
+		cafe24.Parse(body, &ret)
 	} else if t == model.SolutionTypeGodo {
-		godo.Parse(uri, &ret)
+		godo.Parse(body, &ret)
 	} else if t == model.SolutionTypeYoung {
-		young.Parse(uri, &ret)
+		young.Parse(body, &ret)
 	} else if t == model.SolutionTypeOthers {
-		magento.Parse(uri, &ret)
+		magento.Parse(body, &ret)
 	}
 	return
 }
 
-func getSolutionType(url string) (t model.SolutionType, reterr error) {
-	body, err := util.Get(url)
-	if err != nil {
-		reterr = err
-		return
-	}
+func getSolutionType(body string) (t model.SolutionType, theme string, reterr error) {
 	if strings.Contains(body, "window.CAFE24") {
 		t = model.SolutionTypeCafe24
 	} else if strings.Contains(body, "woocommerce-page") {
@@ -184,6 +234,16 @@ func getSolutionType(url string) (t model.SolutionType, reterr error) {
 		//reterr = errors.New("no found solution type")
 	}
 
+	theme = ""
+	re := regexp.MustCompile(`wp-content\/themes\/(.+?)\/`)
+	result := re.FindStringSubmatch(body)
+
+	if len(result) > 1 {
+		theme = result[1]
+	} else {
+		theme = "generic"
+	}
+
 	//ioutil.WriteFile(string(t) + ".html", []byte(body), 644)
 	return
 }

+ 46 - 0
controllers/scraper/tct_test.go

@@ -0,0 +1,46 @@
+package controllers_scraper
+
+import (
+	"kkscrap-go/model"
+	"testing"
+)
+
+func TestToProduct(t *testing.T) {
+	ori := model.ItemInfo{
+		SolutionName:  "hello",
+		Version:       "",
+		Emails:        nil,
+		DomainName:    "",
+		DomainURI:     "",
+		ItemName:      "",
+		ItemNick:      "",
+		ModelName:     "",
+		ModelNo:       "",
+		BrandName:     "",
+		Sku:           "",
+		ItemCategory:  "",
+		Manufacturer:  "",
+		Origin:        "",
+		Language:      "",
+		Currency:      "",
+		SalesPrice:    0,
+		DeliveryPrice: 0,
+		MinimumQty:    0,
+		UserCredit:    0,
+		Options:       nil,
+		Images:        nil,
+		ShortDesc:     "",
+		OriginDesc:    "",
+		TextDesc:      "1234",
+	}
+
+	pro := toProductPage(ori)
+
+	if pro.TextDesc != "1234" {
+		t.Error("TextDesc is not equal")
+	}
+
+	if pro.SolutionName != "hello" {
+		t.Error("SolutionType is not equal")
+	}
+}

+ 1 - 3
controllers/scraper/wordpress/parser.go

@@ -11,9 +11,7 @@ import (
 	"strings"
 )
 
-func Parse(uri string, item *model.ItemInfo) {
-	body, err := util.Get(uri)
-	util.CheckError(err)
+func Parse(body string, item *model.ItemInfo) {
 	p := getProduct(body)
 	if p.Image != "" {
 		item.Images = append(item.Images, p.Image)

+ 1 - 4
controllers/scraper/young/parser.go

@@ -11,10 +11,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-func Parse(uri string, item *model.ItemInfo) {
-	body, err := util.Get(uri)
-	util.CheckError(err)
-
+func Parse(body string, item *model.ItemInfo) {
 	item.Language = common.GetLanguage(body)
 
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))

+ 0 - 31
locals/common-func-dev.go

@@ -105,37 +105,6 @@ func (y *SyncController) Init() error {
 
 }
 
-func (t *PageVars) ChkPageVars(chk string) {
-
-	if t.Query == "" {
-		e.ChkLog(chk, "Query is empty")
-	}
-
-	if t.Fields == "" {
-		e.ChkLog(chk, "Fields is empty")
-	}
-
-	if t.Asc == "" {
-		e.ChkLog(chk, "Asc is empty")
-	}
-
-	if t.Desc == "" {
-		e.ChkLog(chk, "Desc is empty")
-	}
-
-	if t.Limit == 0 {
-		e.ChkLog(chk, "Limit is zero")
-	}
-
-	if t.Offset == 0 {
-		e.ChkLog(chk, "Offset is zero")
-	}
-
-	e.ChkLog(chk+", PageVars value is", *t)
-
-	return
-}
-
 type (
 	MemoryMap map[string]interface{}
 

+ 19 - 0
test/api.http

@@ -10,4 +10,23 @@ Content-Type: application/json
 
 {
   "itemUrl": "https://www.iroirotokyo.com/product/%eb%ac%b4%eb%a3%8c%eb%b0%b0%ec%86%a1-%ec%98%a4%eb%a6%ac%ed%9e%88%eb%a1%9c-%ea%b3%a4%ec%95%bd%ec%a0%a4%eb%a6%ac-%ed%8c%8c%ec%9a%b0%ec%b9%98-%ec%b9%bc%eb%a1%9c%eb%a6%ac%ec%a0%9c%eb%a1%9c%eb%b0%b1/"
+}
+###
+POST http://localhost:19080/solution-type-get
+Content-Type: application/json
+
+{
+  "url": "https://www.iroirotokyo.com/product/%eb%ac%b4%eb%a3%8c%eb%b0%b0%ec%86%a1-%ec%98%a4%eb%a6%ac%ed%9e%88%eb%a1%9c-%ea%b3%a4%ec%95%bd%ec%a0%a4%eb%a6%ac-%ed%8c%8c%ec%9a%b0%ec%b9%98-%ec%b9%bc%eb%a1%9c%eb%a6%ac%ec%a0%9c%eb%a1%9c%eb%b0%b1/"
+}
+###
+POST http://localhost:19080/product-page-get
+Content-Type: application/json
+
+{
+  "solutionType": "wordpress",
+  "themeType": "",
+  "products": [
+    {"uri": "https://www.mangokuro.com/shop/turkey/"},
+    {"uri": "https://www.iroirotokyo.com/product/%eb%ac%b4%eb%a3%8c%eb%b0%b0%ec%86%a1-%ec%98%a4%eb%a6%ac%ed%9e%88%eb%a1%9c-%ea%b3%a4%ec%95%bd%ec%a0%a4%eb%a6%ac-%ed%8c%8c%ec%9a%b0%ec%b9%98-%ec%b9%bc%eb%a1%9c%eb%a6%ac%ec%a0%9c%eb%a1%9c%eb%b0%b1/"}
+  ]
 }

+ 24 - 0
test/main.go

@@ -0,0 +1,24 @@
+package main
+
+import (
+	"fmt"
+	"regexp"
+)
+
+func main() {
+
+	html := `aaaaaaaa
+<link rel="stylesheet" href="https://example.com/wp-content/themes/my-theme/style.css" type="text/css" media="all">
+aaaaaaaaaaa`
+
+	re := regexp.MustCompile(`wp-content\/themes\/(.+?)\/`)
+	result := re.FindStringSubmatch(html)
+
+	if len(result) > 1 {
+		themeName := result[1]
+		fmt.Println(themeName)
+	} else {
+		fmt.Println("Theme name not found")
+	}
+
+}