mirror of
				https://github.com/nkanaev/yarr.git
				synced 2025-10-29 22:29:59 +00:00 
			
		
		
		
	find favicons
This commit is contained in:
		| @@ -14,6 +14,8 @@ func FindFeeds(body string, base string) map[string]string { | ||||
| 		return candidates | ||||
| 	} | ||||
|  | ||||
| 	// find direct links | ||||
| 	// css: link[type=application/atom+xml] | ||||
| 	linkTypes := []string{"application/atom+xml", "application/rss+xml", "application/json"} | ||||
| 	isFeedLink := func(n *html.Node) bool { | ||||
| 		if n.Type == html.ElementNode && n.Data == "link" { | ||||
| @@ -35,29 +37,21 @@ func FindFeeds(body string, base string) map[string]string { | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// guess by hyperlink properties | ||||
| 	if len(candidates) == 0 { | ||||
| 		// guess by hyperlink properties: | ||||
| 		// - a[href="feed"] | ||||
| 		// - a:contains("rss") | ||||
| 		// ...etc | ||||
| 		// css: a[href="feed"] | ||||
| 		// css: a:contains("rss") | ||||
| 		feedHrefs := []string{"feed", "feed.xml", "rss.xml", "atom.xml"} | ||||
| 		feedTexts := []string{"rss", "feed"} | ||||
| 		isFeedHyperLink := func(n *html.Node) bool { | ||||
| 			if n.Type == html.ElementNode && n.Data == "a" { | ||||
| 				href := strings.Trim(getAttr(n, "href"), "/") | ||||
| 				text := getText(n) | ||||
|  | ||||
| 				for _, feedHref := range feedHrefs { | ||||
| 					if strings.HasSuffix(href, feedHref) { | ||||
| 				if any(feedHrefs, strings.Trim(getAttr(n, "href"), "/"), strings.HasSuffix) { | ||||
| 					return true | ||||
| 				} | ||||
| 				} | ||||
| 				for _, feedText := range feedTexts { | ||||
| 					if strings.EqualFold(text, feedText) { | ||||
| 				if any(feedTexts, getText(n), strings.EqualFold) { | ||||
| 					return true | ||||
| 				} | ||||
| 			} | ||||
| 			} | ||||
| 			return false | ||||
| 		} | ||||
| 		for _, node := range getNodes(doc, isFeedHyperLink) { | ||||
| @@ -71,3 +65,23 @@ func FindFeeds(body string, base string) map[string]string { | ||||
|  | ||||
| 	return candidates | ||||
| } | ||||
|  | ||||
| func FindIcons(body string, base string) []string { | ||||
| 	icons := make([]string, 0) | ||||
|  | ||||
| 	doc, err := html.Parse(strings.NewReader(body)) | ||||
| 	if err != nil { | ||||
| 		return icons | ||||
| 	} | ||||
|  | ||||
| 	// css: link[rel=icon] | ||||
| 	isLink := func(n *html.Node) bool { | ||||
| 		return n.Type == html.ElementNode && n.Data == "link" | ||||
| 	} | ||||
| 	for _, node := range getNodes(doc, isLink) { | ||||
| 		if any(strings.Split(getAttr(node, "rel"), " "), "icon", strings.EqualFold) { | ||||
| 			icons = append(icons, absoluteUrl(getAttr(node, "href"), base)) | ||||
| 		} | ||||
| 	} | ||||
| 	return icons | ||||
| } | ||||
|   | ||||
| @@ -46,7 +46,7 @@ func TestFindFeedsLinks(t *testing.T) { | ||||
| } | ||||
|  | ||||
| func TestFindFeedsGuess(t *testing.T) { | ||||
| 	x := ` | ||||
| 	body := ` | ||||
| 		<!DOCTYPE html> | ||||
| 		<html lang="en"> | ||||
| 		<body> | ||||
| @@ -60,15 +60,38 @@ func TestFindFeedsGuess(t *testing.T) { | ||||
| 		</body> | ||||
| 		</html> | ||||
| 	` | ||||
| 	r := FindFeeds(x, base) | ||||
|  | ||||
| 	e := map[string]string{ | ||||
| 	have := FindFeeds(body, base) | ||||
| 	want := map[string]string{ | ||||
| 		base + "/feed.xml": "", | ||||
| 		base + "/news": "", | ||||
| 	} | ||||
| 	if !reflect.DeepEqual(e, r) { | ||||
| 		t.Logf("want: %#v", e) | ||||
| 		t.Logf("have: %#v", r) | ||||
| 	if !reflect.DeepEqual(want, have) { | ||||
| 		t.Logf("want: %#v", want) | ||||
| 		t.Logf("have: %#v", have) | ||||
| 		t.Fatal("invalid result") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestFindIcons(t *testing.T) { | ||||
| 	body := ` | ||||
| 		<!DOCTYPE html> | ||||
| 		<html lang="en"> | ||||
| 		<head> | ||||
| 			<meta charset="UTF-8"> | ||||
| 			<title></title> | ||||
| 			<link rel="icon favicon" href="/favicon.ico"> | ||||
| 			<link rel="icon macicon" href="path/to/favicon.png"> | ||||
| 		</head> | ||||
| 		<body> | ||||
| 			 | ||||
| 		</body> | ||||
| 		</html> | ||||
| 	` | ||||
| 	have := FindIcons(body, base) | ||||
| 	want := []string{base + "/favicon.ico", base + "/path/to/favicon.png"} | ||||
| 	if !reflect.DeepEqual(have, want) { | ||||
| 		t.Logf("want: %#v", want) | ||||
| 		t.Logf("have: %#v", have) | ||||
| 		t.Fatal("invalid result") | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -7,6 +7,15 @@ import ( | ||||
| 	"golang.org/x/net/html" | ||||
| ) | ||||
|  | ||||
| func any(els []string, el string, match func(string, string) bool) bool { | ||||
| 	for _, x := range els { | ||||
| 		if match(x, el) { | ||||
| 			return true | ||||
| 		} | ||||
| 	} | ||||
| 	return false | ||||
| } | ||||
|  | ||||
| func getAttr(node *html.Node, key string) string { | ||||
| 	for _, a := range node.Attr { | ||||
| 		if a.Key == key { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user