mirror of
				https://github.com/nkanaev/yarr.git
				synced 2025-10-26 05:01:51 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			196 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			196 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package server
 | |
| 
 | |
| import (
 | |
| 	"bytes"
 | |
| 	"errors"
 | |
| 	"fmt"
 | |
| 	"github.com/PuerkitoBio/goquery"
 | |
| 	"github.com/mmcdole/gofeed"
 | |
| 	"github.com/nkanaev/yarr/storage"
 | |
| 	"io/ioutil"
 | |
| 	"net/http"
 | |
| 	"net/url"
 | |
| )
 | |
| 
 | |
| type FeedSource struct {
 | |
| 	Title string `json:"title"`
 | |
| 	Url   string `json:"url"`
 | |
| }
 | |
| 
 | |
| const feedLinks = `
 | |
| 	link[type='application/rss+xml'],
 | |
| 	link[type='application/atom+xml'],
 | |
| 	a[href$="/feed"],
 | |
| 	a[href$="/feed/"],
 | |
| 	a[href$="feed.xml"],
 | |
| 	a[href$="atom.xml"],
 | |
| 	a[href$="rss.xml"],
 | |
| 	a:contains("rss"),
 | |
| 	a:contains("RSS"),
 | |
| 	a:contains("feed"),
 | |
| 	a:contains("FEED")
 | |
| `
 | |
| 
 | |
| func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
 | |
| 	sources := make([]FeedSource, 0, 0)
 | |
| 
 | |
| 	doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
 | |
| 	if err != nil {
 | |
| 		return sources, err
 | |
| 	}
 | |
| 	base, err := url.Parse(siteurl)
 | |
| 	if err != nil {
 | |
| 		return sources, err
 | |
| 	}
 | |
| 
 | |
| 	doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) {
 | |
| 		if href, ok := s.Attr("href"); ok {
 | |
| 			feedUrl, err := url.Parse(href)
 | |
| 			if err != nil {
 | |
| 				return
 | |
| 			}
 | |
| 			title := s.AttrOr("title", "")
 | |
| 			url := base.ResolveReference(feedUrl).String()
 | |
| 			sources = append(sources, FeedSource{Title: title, Url: url})
 | |
| 		}
 | |
| 	})
 | |
| 	return sources, nil
 | |
| }
 | |
| 
 | |
| func discoverFeed(url, userAgent string) (*gofeed.Feed, *[]FeedSource, error) {
 | |
| 	// Query URL
 | |
| 	feedreq, _ := http.NewRequest("GET", url, nil)
 | |
| 	feedreq.Header.Set("user-agent", userAgent)
 | |
| 	feedclient := &http.Client{}
 | |
| 	res, err := feedclient.Do(feedreq)
 | |
| 	if err != nil {
 | |
| 		return nil, nil, err
 | |
| 	} else if res.StatusCode != 200 {
 | |
| 		errmsg := fmt.Sprintf("Failed to fetch feed %s (status: %d)", url, res.StatusCode)
 | |
| 		return nil, nil, errors.New(errmsg)
 | |
| 	}
 | |
| 	content, err := ioutil.ReadAll(res.Body)
 | |
| 	if err != nil {
 | |
| 		return nil, nil, err
 | |
| 	}
 | |
| 
 | |
| 	// Try to feed into parser
 | |
| 	feedparser := gofeed.NewParser()
 | |
| 	feed, err := feedparser.Parse(bytes.NewReader(content))
 | |
| 	if err == nil {
 | |
| 		// WILD: some feeds do not have link to itself
 | |
| 		if len(feed.FeedLink) == 0 {
 | |
| 			feed.FeedLink = url
 | |
| 		}
 | |
| 		return feed, nil, nil
 | |
| 	}
 | |
| 
 | |
| 	// Possibly an html link. Search for feed links
 | |
| 	sources, err := searchFeedLinks(content, url)
 | |
| 	if err != nil {
 | |
| 		return nil, nil, err
 | |
| 	} else if len(sources) == 0 {
 | |
| 		return nil, nil, errors.New("No feeds found at the given url")
 | |
| 	} else if len(sources) == 1 {
 | |
| 		if sources[0].Url == url {
 | |
| 			return nil, nil, errors.New("Recursion!")
 | |
| 		}
 | |
| 		return discoverFeed(sources[0].Url, userAgent)
 | |
| 	}
 | |
| 	return nil, &sources, nil
 | |
| }
 | |
| 
 | |
| func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
 | |
| 	candidateUrls := make([]string, 0)
 | |
| 
 | |
| 	favicon := func(link string) string {
 | |
| 		u, err := url.Parse(link)
 | |
| 		if err != nil {
 | |
| 			return ""
 | |
| 		}
 | |
| 		return fmt.Sprintf("%s://%s/favicon.ico", u.Scheme, u.Host)
 | |
| 	}
 | |
| 
 | |
| 	if len(websiteUrl) != 0 {
 | |
| 		doc, err := goquery.NewDocument(websiteUrl)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		doc.Find(`link[rel=icon]`).EachWithBreak(func(i int, s *goquery.Selection) bool {
 | |
| 			if href, ok := s.Attr("href"); ok {
 | |
| 				if hrefUrl, err := url.Parse(href); err == nil {
 | |
| 					faviconUrl := doc.Url.ResolveReference(hrefUrl).String()
 | |
| 					candidateUrls = append(candidateUrls, faviconUrl)
 | |
| 				}
 | |
| 			}
 | |
| 			return true
 | |
| 		})
 | |
| 
 | |
| 		if c := favicon(websiteUrl); len(c) != 0 {
 | |
| 			candidateUrls = append(candidateUrls, c)
 | |
| 		}
 | |
| 	}
 | |
| 	if c := favicon(feedUrl); len(c) != 0 {
 | |
| 		candidateUrls = append(candidateUrls, c)
 | |
| 	}
 | |
| 
 | |
| 	client := http.Client{}
 | |
| 
 | |
| 	imageTypes := [4]string{
 | |
| 		"image/x-icon",
 | |
| 		"image/png",
 | |
| 		"image/jpeg",
 | |
| 		"image/gif",
 | |
| 	}
 | |
| 	for _, url := range candidateUrls {
 | |
| 		if res, err := client.Get(url); err == nil && res.StatusCode == 200 {
 | |
| 			if content, err := ioutil.ReadAll(res.Body); err == nil {
 | |
| 				ctype := http.DetectContentType(content)
 | |
| 				for _, itype := range imageTypes {
 | |
| 					if ctype == itype {
 | |
| 						return &content, nil
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	return nil, nil
 | |
| }
 | |
| 
 | |
| func convertItems(items []*gofeed.Item, feed storage.Feed) []storage.Item {
 | |
| 	result := make([]storage.Item, len(items))
 | |
| 	for i, item := range items {
 | |
| 		imageURL := ""
 | |
| 		if item.Image != nil {
 | |
| 			imageURL = item.Image.URL
 | |
| 		}
 | |
| 		author := ""
 | |
| 		if item.Author != nil {
 | |
| 			author = item.Author.Name
 | |
| 		}
 | |
| 		result[i] = storage.Item{
 | |
| 			GUID:        item.GUID,
 | |
| 			FeedId:      feed.Id,
 | |
| 			Title:       item.Title,
 | |
| 			Link:        item.Link,
 | |
| 			Description: item.Description,
 | |
| 			Content:     item.Content,
 | |
| 			Author:      author,
 | |
| 			Date:        item.PublishedParsed,
 | |
| 			DateUpdated: item.UpdatedParsed,
 | |
| 			Status:      storage.UNREAD,
 | |
| 			Image:       imageURL,
 | |
| 		}
 | |
| 	}
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func listItems(f storage.Feed) ([]storage.Item, error) {
 | |
| 	fp := gofeed.NewParser()
 | |
| 	feed, err := fp.ParseURL(f.FeedLink)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 	return convertItems(feed.Items, f), nil
 | |
| }
 |