mirror of
https://github.com/nkanaev/yarr.git
synced 2025-10-30 14:33:31 +00:00
handle google url redirect in page crawler
This commit is contained in:
17
src/content/silo/url.go
Normal file
17
src/content/silo/url.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package silo
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func RedirectURL(link string) string {
|
||||
if strings.HasPrefix(link, "https://www.google.com/url?") {
|
||||
if u, err := url.Parse(link); err == nil {
|
||||
if u2 := u.Query().Get("url"); u2 != "" {
|
||||
return u2
|
||||
}
|
||||
}
|
||||
}
|
||||
return link
|
||||
}
|
||||
24
src/content/silo/url_test.go
Normal file
24
src/content/silo/url_test.go
Normal file
@@ -0,0 +1,24 @@
|
||||
package silo
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRedirectURL(t *testing.T) {
|
||||
link := "https://www.google.com/url?rct=j&sa=t&url=https://www.cryptoglobe.com/latest/2022/08/investment-strategist-lyn-alden-explains-why-she-is-still-bullish-on-bitcoin-long-term/&ct=ga&cd=CAIyGjlkMjI1NjUyODE3ODFjMDQ6Y29tOmVuOlVT&usg=AOvVaw16C2fJtw6m8QVEbto2HCKK"
|
||||
want := "https://www.cryptoglobe.com/latest/2022/08/investment-strategist-lyn-alden-explains-why-she-is-still-bullish-on-bitcoin-long-term/"
|
||||
have := RedirectURL(link)
|
||||
if have != want {
|
||||
t.Logf("want: %s", want)
|
||||
t.Logf("have: %s", have)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
link = "https://example.com"
|
||||
if RedirectURL(link) != link {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
link = "https://example.com/url?url=test.com"
|
||||
if RedirectURL(link) != link {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user