mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
handle google url redirect in page crawler
This commit is contained in:
parent
b935a1c511
commit
698f5d6d06
@ -105,7 +105,7 @@
|
||||
return api('post', './logout')
|
||||
},
|
||||
crawl: function(url) {
|
||||
return api('get', './page?url=' + url).then(json)
|
||||
return api('get', './page?url=' + encodeURIComponent(url)).then(json)
|
||||
}
|
||||
}
|
||||
})()
|
||||
|
17
src/content/silo/url.go
Normal file
17
src/content/silo/url.go
Normal file
@ -0,0 +1,17 @@
|
||||
package silo
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func RedirectURL(link string) string {
|
||||
if strings.HasPrefix(link, "https://www.google.com/url?") {
|
||||
if u, err := url.Parse(link); err == nil {
|
||||
if u2 := u.Query().Get("url"); u2 != "" {
|
||||
return u2
|
||||
}
|
||||
}
|
||||
}
|
||||
return link
|
||||
}
|
24
src/content/silo/url_test.go
Normal file
24
src/content/silo/url_test.go
Normal file
@ -0,0 +1,24 @@
|
||||
package silo
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRedirectURL(t *testing.T) {
|
||||
link := "https://www.google.com/url?rct=j&sa=t&url=https://www.cryptoglobe.com/latest/2022/08/investment-strategist-lyn-alden-explains-why-she-is-still-bullish-on-bitcoin-long-term/&ct=ga&cd=CAIyGjlkMjI1NjUyODE3ODFjMDQ6Y29tOmVuOlVT&usg=AOvVaw16C2fJtw6m8QVEbto2HCKK"
|
||||
want := "https://www.cryptoglobe.com/latest/2022/08/investment-strategist-lyn-alden-explains-why-she-is-still-bullish-on-bitcoin-long-term/"
|
||||
have := RedirectURL(link)
|
||||
if have != want {
|
||||
t.Logf("want: %s", want)
|
||||
t.Logf("have: %s", have)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
link = "https://example.com"
|
||||
if RedirectURL(link) != link {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
link = "https://example.com/url?url=test.com"
|
||||
if RedirectURL(link) != link {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
@ -456,6 +456,9 @@ func (s *Server) handleOPMLExport(c *router.Context) {
|
||||
func (s *Server) handlePageCrawl(c *router.Context) {
|
||||
url := c.Req.URL.Query().Get("url")
|
||||
|
||||
if newUrl := silo.RedirectURL(url); newUrl != "" {
|
||||
url = newUrl
|
||||
}
|
||||
if content := silo.VideoIFrame(url); content != "" {
|
||||
c.JSON(http.StatusOK, map[string]string{
|
||||
"content": sanitizer.Sanitize(url, content),
|
||||
|
Loading…
x
Reference in New Issue
Block a user