mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
rewrite icon crawling
This commit is contained in:
parent
f38dcfba3b
commit
851aa1a136
@ -76,10 +76,10 @@ func (s *Storage) UpdateFeedIcon(feedId int64, icon *[]byte) bool {
|
||||
}
|
||||
|
||||
func (s *Storage) ListFeeds() []Feed {
|
||||
result := make([]Feed, 0, 0)
|
||||
result := make([]Feed, 0)
|
||||
rows, err := s.db.Query(`
|
||||
select id, folder_id, title, description, link, feed_link,
|
||||
ifnull(icon, '') != '' as has_icon
|
||||
ifnull(length(icon), 0) > 0 as has_icon
|
||||
from feeds
|
||||
order by title collate nocase
|
||||
`)
|
||||
@ -107,6 +107,36 @@ func (s *Storage) ListFeeds() []Feed {
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *Storage) ListFeedsMissingIcons() []Feed {
|
||||
result := make([]Feed, 0)
|
||||
rows, err := s.db.Query(`
|
||||
select id, folder_id, title, description, link, feed_link
|
||||
from feeds
|
||||
where icon is null
|
||||
`)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return result
|
||||
}
|
||||
for rows.Next() {
|
||||
var f Feed
|
||||
err = rows.Scan(
|
||||
&f.Id,
|
||||
&f.FolderId,
|
||||
&f.Title,
|
||||
&f.Description,
|
||||
&f.Link,
|
||||
&f.FeedLink,
|
||||
)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return result
|
||||
}
|
||||
result = append(result, f)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *Storage) GetFeed(id int64) *Feed {
|
||||
var f Feed
|
||||
err := s.db.QueryRow(`
|
||||
|
@ -76,8 +76,16 @@ func DiscoverFeed(candidateUrl string) (*DiscoverResult, error) {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
||||
candidateUrls := make([]string, 0)
|
||||
var emptyIcon = make([]byte, 0)
|
||||
var imageTypes = map[string]bool{
|
||||
"image/x-icon": true,
|
||||
"image/png": true,
|
||||
"image/jpeg": true,
|
||||
"image/gif": true,
|
||||
}
|
||||
|
||||
func findFavicon(siteUrl, feedUrl string) (*[]byte, error) {
|
||||
urls := make([]string, 0)
|
||||
|
||||
favicon := func(link string) string {
|
||||
u, err := url.Parse(link)
|
||||
@ -87,49 +95,43 @@ func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
||||
return fmt.Sprintf("%s://%s/favicon.ico", u.Scheme, u.Host)
|
||||
}
|
||||
|
||||
if len(websiteUrl) != 0 {
|
||||
res, err := client.get(websiteUrl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
body, err := ioutil.ReadAll(res.Body)
|
||||
if siteUrl != "" {
|
||||
if res, err := client.get(siteUrl); err == nil {
|
||||
defer res.Body.Close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
candidateUrls = append(candidateUrls, scraper.FindIcons(string(body), websiteUrl)...)
|
||||
if c := favicon(websiteUrl); len(c) != 0 {
|
||||
candidateUrls = append(candidateUrls, c)
|
||||
if body, err := ioutil.ReadAll(res.Body); err == nil {
|
||||
urls = append(urls, scraper.FindIcons(string(body), siteUrl)...)
|
||||
if c := favicon(siteUrl); c != "" {
|
||||
urls = append(urls, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
if c := favicon(feedUrl); len(c) != 0 {
|
||||
candidateUrls = append(candidateUrls, c)
|
||||
}
|
||||
|
||||
imageTypes := [4]string{
|
||||
"image/x-icon",
|
||||
"image/png",
|
||||
"image/jpeg",
|
||||
"image/gif",
|
||||
if c := favicon(feedUrl); c != "" {
|
||||
urls = append(urls, c)
|
||||
}
|
||||
for _, url := range candidateUrls {
|
||||
res, err := client.get(url)
|
||||
|
||||
for _, u := range urls {
|
||||
res, err := client.get(u)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode == 200 {
|
||||
if content, err := ioutil.ReadAll(res.Body); err == nil {
|
||||
if res.StatusCode != 200 {
|
||||
continue
|
||||
}
|
||||
|
||||
content, err := ioutil.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
ctype := http.DetectContentType(content)
|
||||
for _, itype := range imageTypes {
|
||||
if ctype == itype {
|
||||
if imageTypes[ctype] {
|
||||
return &content, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
return &emptyIcon, nil
|
||||
}
|
||||
|
||||
func ConvertItems(items []parser.Item, feed storage.Feed) []storage.Item {
|
||||
|
@ -41,11 +41,9 @@ func (w *Worker) StartFeedCleaner() {
|
||||
|
||||
func (w *Worker) FindFavicons() {
|
||||
go func() {
|
||||
for _, feed := range w.db.ListFeeds() {
|
||||
if !feed.HasIcon {
|
||||
for _, feed := range w.db.ListFeedsMissingIcons() {
|
||||
w.FindFeedFavicon(feed)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user