drop direct goquery dependency

This commit is contained in:
Nazar Kanaev 2021-03-18 11:51:18 +00:00
parent c896440525
commit e0c4752bbf
2 changed files with 4 additions and 55 deletions

1
go.mod
View File

@ -3,7 +3,6 @@ module github.com/nkanaev/yarr
go 1.16 go 1.16
require ( require (
github.com/PuerkitoBio/goquery v1.5.1
github.com/mattn/go-sqlite3 v1.14.0 github.com/mattn/go-sqlite3 v1.14.0
github.com/mmcdole/gofeed v1.0.0 github.com/mmcdole/gofeed v1.0.0
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e

View File

@ -4,9 +4,9 @@ import (
"bytes" "bytes"
"errors" "errors"
"fmt" "fmt"
"github.com/PuerkitoBio/goquery"
"github.com/mmcdole/gofeed" "github.com/mmcdole/gofeed"
"github.com/nkanaev/yarr/src/storage" "github.com/nkanaev/yarr/src/storage"
"github.com/nkanaev/yarr/src/crawler"
"io/ioutil" "io/ioutil"
"net" "net"
"net/http" "net/http"
@ -63,44 +63,7 @@ var defaultClient *Client
func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) { func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
sources := make([]FeedSource, 0, 0) sources := make([]FeedSource, 0, 0)
for url, title := range crawler.FindFeeds(string(html), siteurl) {
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
if err != nil {
return sources, err
}
base, err := url.Parse(siteurl)
if err != nil {
return sources, err
}
// feed {url: title} map
feeds := make(map[string]string)
doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) {
// Unlikely to happen, but don't get more than N links
if len(feeds) > 10 {
return
}
if href, ok := s.Attr("href"); ok {
feedUrl, err := url.Parse(href)
if err != nil {
return
}
title := s.AttrOr("title", "")
url := base.ResolveReference(feedUrl).String()
if _, alreadyExists := feeds[url]; alreadyExists {
if feeds[url] == "" {
feeds[url] = title
}
} else {
feeds[url] = title
}
}
})
for url, title := range feeds {
sources = append(sources, FeedSource{Title: title, Url: url}) sources = append(sources, FeedSource{Title: title, Url: url})
} }
return sources, nil return sources, nil
@ -170,29 +133,16 @@ func FindFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
} }
if len(websiteUrl) != 0 { if len(websiteUrl) != 0 {
base, err := url.Parse(websiteUrl)
if err != nil {
return nil, err
}
res, err := defaultClient.get(websiteUrl) res, err := defaultClient.get(websiteUrl)
if err != nil { if err != nil {
return nil, err return nil, err
} }
body, err := ioutil.ReadAll(res.Body)
defer res.Body.Close() defer res.Body.Close()
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil { if err != nil {
return nil, err return nil, err
} }
doc.Find(`link[rel=icon]`).EachWithBreak(func(i int, s *goquery.Selection) bool { candidateUrls = append(candidateUrls, crawler.FindIcons(string(body), websiteUrl)...)
if href, ok := s.Attr("href"); ok {
if hrefUrl, err := url.Parse(href); err == nil {
faviconUrl := base.ResolveReference(hrefUrl).String()
candidateUrls = append(candidateUrls, faviconUrl)
}
}
return true
})
if c := favicon(websiteUrl); len(c) != 0 { if c := favicon(websiteUrl); len(c) != 0 {
candidateUrls = append(candidateUrls, c) candidateUrls = append(candidateUrls, c)
} }