mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
handle google url redirect in page crawler
This commit is contained in:
parent
b935a1c511
commit
698f5d6d06
@ -105,7 +105,7 @@
|
|||||||
return api('post', './logout')
|
return api('post', './logout')
|
||||||
},
|
},
|
||||||
crawl: function(url) {
|
crawl: function(url) {
|
||||||
return api('get', './page?url=' + url).then(json)
|
return api('get', './page?url=' + encodeURIComponent(url)).then(json)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})()
|
})()
|
||||||
|
17
src/content/silo/url.go
Normal file
17
src/content/silo/url.go
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
package silo
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func RedirectURL(link string) string {
|
||||||
|
if strings.HasPrefix(link, "https://www.google.com/url?") {
|
||||||
|
if u, err := url.Parse(link); err == nil {
|
||||||
|
if u2 := u.Query().Get("url"); u2 != "" {
|
||||||
|
return u2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return link
|
||||||
|
}
|
24
src/content/silo/url_test.go
Normal file
24
src/content/silo/url_test.go
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
package silo
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func TestRedirectURL(t *testing.T) {
|
||||||
|
link := "https://www.google.com/url?rct=j&sa=t&url=https://www.cryptoglobe.com/latest/2022/08/investment-strategist-lyn-alden-explains-why-she-is-still-bullish-on-bitcoin-long-term/&ct=ga&cd=CAIyGjlkMjI1NjUyODE3ODFjMDQ6Y29tOmVuOlVT&usg=AOvVaw16C2fJtw6m8QVEbto2HCKK"
|
||||||
|
want := "https://www.cryptoglobe.com/latest/2022/08/investment-strategist-lyn-alden-explains-why-she-is-still-bullish-on-bitcoin-long-term/"
|
||||||
|
have := RedirectURL(link)
|
||||||
|
if have != want {
|
||||||
|
t.Logf("want: %s", want)
|
||||||
|
t.Logf("have: %s", have)
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
|
||||||
|
link = "https://example.com"
|
||||||
|
if RedirectURL(link) != link {
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
|
||||||
|
link = "https://example.com/url?url=test.com"
|
||||||
|
if RedirectURL(link) != link {
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
}
|
@ -456,6 +456,9 @@ func (s *Server) handleOPMLExport(c *router.Context) {
|
|||||||
func (s *Server) handlePageCrawl(c *router.Context) {
|
func (s *Server) handlePageCrawl(c *router.Context) {
|
||||||
url := c.Req.URL.Query().Get("url")
|
url := c.Req.URL.Query().Get("url")
|
||||||
|
|
||||||
|
if newUrl := silo.RedirectURL(url); newUrl != "" {
|
||||||
|
url = newUrl
|
||||||
|
}
|
||||||
if content := silo.VideoIFrame(url); content != "" {
|
if content := silo.VideoIFrame(url); content != "" {
|
||||||
c.JSON(http.StatusOK, map[string]string{
|
c.JSON(http.StatusOK, map[string]string{
|
||||||
"content": sanitizer.Sanitize(url, content),
|
"content": sanitizer.Sanitize(url, content),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user