mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
find favicons
This commit is contained in:
parent
1f042a8434
commit
c896440525
@ -14,6 +14,8 @@ func FindFeeds(body string, base string) map[string]string {
|
|||||||
return candidates
|
return candidates
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// find direct links
|
||||||
|
// css: link[type=application/atom+xml]
|
||||||
linkTypes := []string{"application/atom+xml", "application/rss+xml", "application/json"}
|
linkTypes := []string{"application/atom+xml", "application/rss+xml", "application/json"}
|
||||||
isFeedLink := func(n *html.Node) bool {
|
isFeedLink := func(n *html.Node) bool {
|
||||||
if n.Type == html.ElementNode && n.Data == "link" {
|
if n.Type == html.ElementNode && n.Data == "link" {
|
||||||
@ -35,27 +37,19 @@ func FindFeeds(body string, base string) map[string]string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// guess by hyperlink properties
|
||||||
if len(candidates) == 0 {
|
if len(candidates) == 0 {
|
||||||
// guess by hyperlink properties:
|
// css: a[href="feed"]
|
||||||
// - a[href="feed"]
|
// css: a:contains("rss")
|
||||||
// - a:contains("rss")
|
|
||||||
// ...etc
|
|
||||||
feedHrefs := []string{"feed", "feed.xml", "rss.xml", "atom.xml"}
|
feedHrefs := []string{"feed", "feed.xml", "rss.xml", "atom.xml"}
|
||||||
feedTexts := []string{"rss", "feed"}
|
feedTexts := []string{"rss", "feed"}
|
||||||
isFeedHyperLink := func(n *html.Node) bool {
|
isFeedHyperLink := func(n *html.Node) bool {
|
||||||
if n.Type == html.ElementNode && n.Data == "a" {
|
if n.Type == html.ElementNode && n.Data == "a" {
|
||||||
href := strings.Trim(getAttr(n, "href"), "/")
|
if any(feedHrefs, strings.Trim(getAttr(n, "href"), "/"), strings.HasSuffix) {
|
||||||
text := getText(n)
|
return true
|
||||||
|
|
||||||
for _, feedHref := range feedHrefs {
|
|
||||||
if strings.HasSuffix(href, feedHref) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for _, feedText := range feedTexts {
|
if any(feedTexts, getText(n), strings.EqualFold) {
|
||||||
if strings.EqualFold(text, feedText) {
|
return true
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@ -71,3 +65,23 @@ func FindFeeds(body string, base string) map[string]string {
|
|||||||
|
|
||||||
return candidates
|
return candidates
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func FindIcons(body string, base string) []string {
|
||||||
|
icons := make([]string, 0)
|
||||||
|
|
||||||
|
doc, err := html.Parse(strings.NewReader(body))
|
||||||
|
if err != nil {
|
||||||
|
return icons
|
||||||
|
}
|
||||||
|
|
||||||
|
// css: link[rel=icon]
|
||||||
|
isLink := func(n *html.Node) bool {
|
||||||
|
return n.Type == html.ElementNode && n.Data == "link"
|
||||||
|
}
|
||||||
|
for _, node := range getNodes(doc, isLink) {
|
||||||
|
if any(strings.Split(getAttr(node, "rel"), " "), "icon", strings.EqualFold) {
|
||||||
|
icons = append(icons, absoluteUrl(getAttr(node, "href"), base))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return icons
|
||||||
|
}
|
||||||
|
@ -46,7 +46,7 @@ func TestFindFeedsLinks(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestFindFeedsGuess(t *testing.T) {
|
func TestFindFeedsGuess(t *testing.T) {
|
||||||
x := `
|
body := `
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<body>
|
<body>
|
||||||
@ -60,15 +60,38 @@ func TestFindFeedsGuess(t *testing.T) {
|
|||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
`
|
`
|
||||||
r := FindFeeds(x, base)
|
have := FindFeeds(body, base)
|
||||||
|
want := map[string]string{
|
||||||
e := map[string]string{
|
|
||||||
base + "/feed.xml": "",
|
base + "/feed.xml": "",
|
||||||
base + "/news": "",
|
base + "/news": "",
|
||||||
}
|
}
|
||||||
if !reflect.DeepEqual(e, r) {
|
if !reflect.DeepEqual(want, have) {
|
||||||
t.Logf("want: %#v", e)
|
t.Logf("want: %#v", want)
|
||||||
t.Logf("have: %#v", r)
|
t.Logf("have: %#v", have)
|
||||||
|
t.Fatal("invalid result")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFindIcons(t *testing.T) {
|
||||||
|
body := `
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title></title>
|
||||||
|
<link rel="icon favicon" href="/favicon.ico">
|
||||||
|
<link rel="icon macicon" href="path/to/favicon.png">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
`
|
||||||
|
have := FindIcons(body, base)
|
||||||
|
want := []string{base + "/favicon.ico", base + "/path/to/favicon.png"}
|
||||||
|
if !reflect.DeepEqual(have, want) {
|
||||||
|
t.Logf("want: %#v", want)
|
||||||
|
t.Logf("have: %#v", have)
|
||||||
t.Fatal("invalid result")
|
t.Fatal("invalid result")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,15 @@ import (
|
|||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func any(els []string, el string, match func(string, string) bool) bool {
|
||||||
|
for _, x := range els {
|
||||||
|
if match(x, el) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func getAttr(node *html.Node, key string) string {
|
func getAttr(node *html.Node, key string) string {
|
||||||
for _, a := range node.Attr {
|
for _, a := range node.Attr {
|
||||||
if a.Key == key {
|
if a.Key == key {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user