mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
drop direct goquery dependency
This commit is contained in:
parent
c896440525
commit
e0c4752bbf
1
go.mod
1
go.mod
@ -3,7 +3,6 @@ module github.com/nkanaev/yarr
|
|||||||
go 1.16
|
go 1.16
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/PuerkitoBio/goquery v1.5.1
|
|
||||||
github.com/mattn/go-sqlite3 v1.14.0
|
github.com/mattn/go-sqlite3 v1.14.0
|
||||||
github.com/mmcdole/gofeed v1.0.0
|
github.com/mmcdole/gofeed v1.0.0
|
||||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
|
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
|
||||||
|
@ -4,9 +4,9 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/PuerkitoBio/goquery"
|
|
||||||
"github.com/mmcdole/gofeed"
|
"github.com/mmcdole/gofeed"
|
||||||
"github.com/nkanaev/yarr/src/storage"
|
"github.com/nkanaev/yarr/src/storage"
|
||||||
|
"github.com/nkanaev/yarr/src/crawler"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -63,44 +63,7 @@ var defaultClient *Client
|
|||||||
|
|
||||||
func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
|
func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
|
||||||
sources := make([]FeedSource, 0, 0)
|
sources := make([]FeedSource, 0, 0)
|
||||||
|
for url, title := range crawler.FindFeeds(string(html), siteurl) {
|
||||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
|
|
||||||
if err != nil {
|
|
||||||
return sources, err
|
|
||||||
}
|
|
||||||
base, err := url.Parse(siteurl)
|
|
||||||
if err != nil {
|
|
||||||
return sources, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// feed {url: title} map
|
|
||||||
feeds := make(map[string]string)
|
|
||||||
|
|
||||||
doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) {
|
|
||||||
// Unlikely to happen, but don't get more than N links
|
|
||||||
if len(feeds) > 10 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if href, ok := s.Attr("href"); ok {
|
|
||||||
feedUrl, err := url.Parse(href)
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
title := s.AttrOr("title", "")
|
|
||||||
url := base.ResolveReference(feedUrl).String()
|
|
||||||
|
|
||||||
if _, alreadyExists := feeds[url]; alreadyExists {
|
|
||||||
if feeds[url] == "" {
|
|
||||||
feeds[url] = title
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
feeds[url] = title
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
for url, title := range feeds {
|
|
||||||
sources = append(sources, FeedSource{Title: title, Url: url})
|
sources = append(sources, FeedSource{Title: title, Url: url})
|
||||||
}
|
}
|
||||||
return sources, nil
|
return sources, nil
|
||||||
@ -170,29 +133,16 @@ func FindFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if len(websiteUrl) != 0 {
|
if len(websiteUrl) != 0 {
|
||||||
base, err := url.Parse(websiteUrl)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
res, err := defaultClient.get(websiteUrl)
|
res, err := defaultClient.get(websiteUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
body, err := ioutil.ReadAll(res.Body)
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
doc.Find(`link[rel=icon]`).EachWithBreak(func(i int, s *goquery.Selection) bool {
|
candidateUrls = append(candidateUrls, crawler.FindIcons(string(body), websiteUrl)...)
|
||||||
if href, ok := s.Attr("href"); ok {
|
|
||||||
if hrefUrl, err := url.Parse(href); err == nil {
|
|
||||||
faviconUrl := base.ResolveReference(hrefUrl).String()
|
|
||||||
candidateUrls = append(candidateUrls, faviconUrl)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
|
|
||||||
if c := favicon(websiteUrl); len(c) != 0 {
|
if c := favicon(websiteUrl); len(c) != 0 {
|
||||||
candidateUrls = append(candidateUrls, c)
|
candidateUrls = append(candidateUrls, c)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user