switch to internal feed parser

This commit is contained in:
Nazar Kanaev 2021-03-23 10:49:25 +00:00
parent c91b439878
commit 5b36530f67
2 changed files with 37 additions and 35 deletions

View File

@ -155,7 +155,7 @@ func (s *Server) handleFeedList(c *router.Context) {
return
}
feed, sources, err := worker.DiscoverFeed(form.Url)
feed, feedUrl, sources, err := worker.DiscoverFeed(form.Url)
if err != nil {
log.Print(err)
c.JSON(http.StatusOK, map[string]string{"status": "notfound"})
@ -165,9 +165,9 @@ func (s *Server) handleFeedList(c *router.Context) {
if feed != nil {
storedFeed := s.db.CreateFeed(
feed.Title,
feed.Description,
feed.Link,
feed.FeedLink,
"",
feed.SiteURL,
feedUrl,
form.FolderID,
)
s.db.CreateItems(worker.ConvertItems(feed.Items, *storedFeed))

View File

@ -4,15 +4,16 @@ import (
"bytes"
"errors"
"fmt"
"github.com/mmcdole/gofeed"
"github.com/nkanaev/yarr/src/crawler"
"github.com/nkanaev/yarr/src/storage"
"io/ioutil"
"log"
"net"
"net/http"
"net/url"
"strings"
"time"
"github.com/nkanaev/yarr/src/crawler"
feedparser "github.com/nkanaev/yarr/src/feed"
"github.com/nkanaev/yarr/src/storage"
)
type FeedSource struct {
@ -55,32 +56,34 @@ func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
return sources, nil
}
func DiscoverFeed(candidateUrl string) (*gofeed.Feed, *[]FeedSource, error) {
func DiscoverFeed(candidateUrl string) (*feedparser.Feed, string, *[]FeedSource, error) {
// Query URL
res, err := defaultClient.get(candidateUrl)
if err != nil {
return nil, nil, err
return nil, "", nil, err
}
defer res.Body.Close()
if res.StatusCode != 200 {
errmsg := fmt.Sprintf("Failed to fetch feed %s (status: %d)", candidateUrl, res.StatusCode)
return nil, nil, errors.New(errmsg)
return nil, "", nil, errors.New(errmsg)
}
content, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, nil, err
return nil, "", nil, err
}
// Try to feed into parser
feedparser := gofeed.NewParser()
feed, err := feedparser.Parse(bytes.NewReader(content))
if err == nil {
/*
// WILD: feeds may not always have link to themselves
if len(feed.FeedLink) == 0 {
feed.FeedLink = candidateUrl
}
*/
// WILD: resolve relative links (path, without host)
/*
base, _ := url.Parse(candidateUrl)
if link, err := url.Parse(feed.Link); err == nil && link.Host == "" {
feed.Link = base.ResolveReference(link).String()
@ -88,23 +91,28 @@ func DiscoverFeed(candidateUrl string) (*gofeed.Feed, *[]FeedSource, error) {
if link, err := url.Parse(feed.FeedLink); err == nil && link.Host == "" {
feed.FeedLink = base.ResolveReference(link).String()
}
*/
err := feed.TranslateURLs(candidateUrl)
if err != nil {
log.Printf("Failed to translate feed urls: %s", err)
}
return feed, nil, nil
return feed, candidateUrl, nil, nil
}
// Possibly an html link. Search for feed links
sources, err := searchFeedLinks(content, candidateUrl)
if err != nil {
return nil, nil, err
return nil, "", nil, err
} else if len(sources) == 0 {
return nil, nil, errors.New("No feeds found at the given url")
return nil, "", nil, errors.New("No feeds found at the given url")
} else if len(sources) == 1 {
if sources[0].Url == candidateUrl {
return nil, nil, errors.New("Recursion!")
return nil, "", nil, errors.New("Recursion!")
}
return DiscoverFeed(sources[0].Url)
}
return nil, &sources, nil
return nil, "", &sources, nil
}
func FindFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
@ -163,17 +171,12 @@ func FindFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
return nil, nil
}
func ConvertItems(items []*gofeed.Item, feed storage.Feed) []storage.Item {
func ConvertItems(items []feedparser.Item, feed storage.Feed) []storage.Item {
result := make([]storage.Item, len(items))
for i, item := range items {
imageURL := ""
if item.Image != nil {
imageURL = item.Image.URL
}
author := ""
if item.Author != nil {
author = item.Author.Name
}
podcastUrl := item.PodcastURL
/*
var podcastUrl *string
if item.Enclosures != nil {
for _, enclosure := range item.Enclosures {
@ -182,19 +185,19 @@ func ConvertItems(items []*gofeed.Item, feed storage.Feed) []storage.Item {
}
}
}
*/
result[i] = storage.Item{
GUID: item.GUID,
FeedId: feed.Id,
Title: item.Title,
Link: item.Link,
Description: item.Description,
Link: item.URL,
Description: "",
Content: item.Content,
Author: author,
Date: item.PublishedParsed,
DateUpdated: item.UpdatedParsed,
Author: "",
Date: &item.Date,
Status: storage.UNREAD,
Image: imageURL,
PodcastURL: podcastUrl,
Image: item.ImageURL,
PodcastURL: &podcastUrl,
}
}
return result
@ -231,7 +234,6 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
db.SetHTTPState(f.Id, lastModified, etag)
}
feedparser := gofeed.NewParser()
feed, err := feedparser.Parse(res.Body)
if err != nil {
return nil, err