mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
rewrite icon crawling
This commit is contained in:
parent
f38dcfba3b
commit
851aa1a136
@ -76,10 +76,10 @@ func (s *Storage) UpdateFeedIcon(feedId int64, icon *[]byte) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *Storage) ListFeeds() []Feed {
|
func (s *Storage) ListFeeds() []Feed {
|
||||||
result := make([]Feed, 0, 0)
|
result := make([]Feed, 0)
|
||||||
rows, err := s.db.Query(`
|
rows, err := s.db.Query(`
|
||||||
select id, folder_id, title, description, link, feed_link,
|
select id, folder_id, title, description, link, feed_link,
|
||||||
ifnull(icon, '') != '' as has_icon
|
ifnull(length(icon), 0) > 0 as has_icon
|
||||||
from feeds
|
from feeds
|
||||||
order by title collate nocase
|
order by title collate nocase
|
||||||
`)
|
`)
|
||||||
@ -107,6 +107,36 @@ func (s *Storage) ListFeeds() []Feed {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *Storage) ListFeedsMissingIcons() []Feed {
|
||||||
|
result := make([]Feed, 0)
|
||||||
|
rows, err := s.db.Query(`
|
||||||
|
select id, folder_id, title, description, link, feed_link
|
||||||
|
from feeds
|
||||||
|
where icon is null
|
||||||
|
`)
|
||||||
|
if err != nil {
|
||||||
|
log.Print(err)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
for rows.Next() {
|
||||||
|
var f Feed
|
||||||
|
err = rows.Scan(
|
||||||
|
&f.Id,
|
||||||
|
&f.FolderId,
|
||||||
|
&f.Title,
|
||||||
|
&f.Description,
|
||||||
|
&f.Link,
|
||||||
|
&f.FeedLink,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
log.Print(err)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
result = append(result, f)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Storage) GetFeed(id int64) *Feed {
|
func (s *Storage) GetFeed(id int64) *Feed {
|
||||||
var f Feed
|
var f Feed
|
||||||
err := s.db.QueryRow(`
|
err := s.db.QueryRow(`
|
||||||
|
@ -76,8 +76,16 @@ func DiscoverFeed(candidateUrl string) (*DiscoverResult, error) {
|
|||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
var emptyIcon = make([]byte, 0)
|
||||||
candidateUrls := make([]string, 0)
|
var imageTypes = map[string]bool{
|
||||||
|
"image/x-icon": true,
|
||||||
|
"image/png": true,
|
||||||
|
"image/jpeg": true,
|
||||||
|
"image/gif": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
func findFavicon(siteUrl, feedUrl string) (*[]byte, error) {
|
||||||
|
urls := make([]string, 0)
|
||||||
|
|
||||||
favicon := func(link string) string {
|
favicon := func(link string) string {
|
||||||
u, err := url.Parse(link)
|
u, err := url.Parse(link)
|
||||||
@ -87,49 +95,43 @@ func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
|||||||
return fmt.Sprintf("%s://%s/favicon.ico", u.Scheme, u.Host)
|
return fmt.Sprintf("%s://%s/favicon.ico", u.Scheme, u.Host)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(websiteUrl) != 0 {
|
if siteUrl != "" {
|
||||||
res, err := client.get(websiteUrl)
|
if res, err := client.get(siteUrl); err == nil {
|
||||||
if err != nil {
|
defer res.Body.Close()
|
||||||
return nil, err
|
if body, err := ioutil.ReadAll(res.Body); err == nil {
|
||||||
}
|
urls = append(urls, scraper.FindIcons(string(body), siteUrl)...)
|
||||||
body, err := ioutil.ReadAll(res.Body)
|
if c := favicon(siteUrl); c != "" {
|
||||||
defer res.Body.Close()
|
urls = append(urls, c)
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
candidateUrls = append(candidateUrls, scraper.FindIcons(string(body), websiteUrl)...)
|
|
||||||
if c := favicon(websiteUrl); len(c) != 0 {
|
|
||||||
candidateUrls = append(candidateUrls, c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if c := favicon(feedUrl); len(c) != 0 {
|
|
||||||
candidateUrls = append(candidateUrls, c)
|
|
||||||
}
|
|
||||||
|
|
||||||
imageTypes := [4]string{
|
|
||||||
"image/x-icon",
|
|
||||||
"image/png",
|
|
||||||
"image/jpeg",
|
|
||||||
"image/gif",
|
|
||||||
}
|
|
||||||
for _, url := range candidateUrls {
|
|
||||||
res, err := client.get(url)
|
|
||||||
if err != nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
defer res.Body.Close()
|
|
||||||
if res.StatusCode == 200 {
|
|
||||||
if content, err := ioutil.ReadAll(res.Body); err == nil {
|
|
||||||
ctype := http.DetectContentType(content)
|
|
||||||
for _, itype := range imageTypes {
|
|
||||||
if ctype == itype {
|
|
||||||
return &content, nil
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, nil
|
|
||||||
|
if c := favicon(feedUrl); c != "" {
|
||||||
|
urls = append(urls, c)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, u := range urls {
|
||||||
|
res, err := client.get(u)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
defer res.Body.Close()
|
||||||
|
if res.StatusCode != 200 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
content, err := ioutil.ReadAll(res.Body)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ctype := http.DetectContentType(content)
|
||||||
|
if imageTypes[ctype] {
|
||||||
|
return &content, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &emptyIcon, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ConvertItems(items []parser.Item, feed storage.Feed) []storage.Item {
|
func ConvertItems(items []parser.Item, feed storage.Feed) []storage.Item {
|
||||||
|
@ -41,10 +41,8 @@ func (w *Worker) StartFeedCleaner() {
|
|||||||
|
|
||||||
func (w *Worker) FindFavicons() {
|
func (w *Worker) FindFavicons() {
|
||||||
go func() {
|
go func() {
|
||||||
for _, feed := range w.db.ListFeeds() {
|
for _, feed := range w.db.ListFeedsMissingIcons() {
|
||||||
if !feed.HasIcon {
|
w.FindFeedFavicon(feed)
|
||||||
w.FindFeedFavicon(feed)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user