mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
switch to internal feed parser
This commit is contained in:
parent
c91b439878
commit
5b36530f67
@ -155,7 +155,7 @@ func (s *Server) handleFeedList(c *router.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
feed, sources, err := worker.DiscoverFeed(form.Url)
|
feed, feedUrl, sources, err := worker.DiscoverFeed(form.Url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Print(err)
|
log.Print(err)
|
||||||
c.JSON(http.StatusOK, map[string]string{"status": "notfound"})
|
c.JSON(http.StatusOK, map[string]string{"status": "notfound"})
|
||||||
@ -165,9 +165,9 @@ func (s *Server) handleFeedList(c *router.Context) {
|
|||||||
if feed != nil {
|
if feed != nil {
|
||||||
storedFeed := s.db.CreateFeed(
|
storedFeed := s.db.CreateFeed(
|
||||||
feed.Title,
|
feed.Title,
|
||||||
feed.Description,
|
"",
|
||||||
feed.Link,
|
feed.SiteURL,
|
||||||
feed.FeedLink,
|
feedUrl,
|
||||||
form.FolderID,
|
form.FolderID,
|
||||||
)
|
)
|
||||||
s.db.CreateItems(worker.ConvertItems(feed.Items, *storedFeed))
|
s.db.CreateItems(worker.ConvertItems(feed.Items, *storedFeed))
|
||||||
|
@ -4,15 +4,16 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/mmcdole/gofeed"
|
|
||||||
"github.com/nkanaev/yarr/src/crawler"
|
|
||||||
"github.com/nkanaev/yarr/src/storage"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
|
"log"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/nkanaev/yarr/src/crawler"
|
||||||
|
feedparser "github.com/nkanaev/yarr/src/feed"
|
||||||
|
"github.com/nkanaev/yarr/src/storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
type FeedSource struct {
|
type FeedSource struct {
|
||||||
@ -55,32 +56,34 @@ func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
|
|||||||
return sources, nil
|
return sources, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func DiscoverFeed(candidateUrl string) (*gofeed.Feed, *[]FeedSource, error) {
|
func DiscoverFeed(candidateUrl string) (*feedparser.Feed, string, *[]FeedSource, error) {
|
||||||
// Query URL
|
// Query URL
|
||||||
res, err := defaultClient.get(candidateUrl)
|
res, err := defaultClient.get(candidateUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, "", nil, err
|
||||||
}
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
if res.StatusCode != 200 {
|
if res.StatusCode != 200 {
|
||||||
errmsg := fmt.Sprintf("Failed to fetch feed %s (status: %d)", candidateUrl, res.StatusCode)
|
errmsg := fmt.Sprintf("Failed to fetch feed %s (status: %d)", candidateUrl, res.StatusCode)
|
||||||
return nil, nil, errors.New(errmsg)
|
return nil, "", nil, errors.New(errmsg)
|
||||||
}
|
}
|
||||||
content, err := ioutil.ReadAll(res.Body)
|
content, err := ioutil.ReadAll(res.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, "", nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to feed into parser
|
// Try to feed into parser
|
||||||
feedparser := gofeed.NewParser()
|
|
||||||
feed, err := feedparser.Parse(bytes.NewReader(content))
|
feed, err := feedparser.Parse(bytes.NewReader(content))
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
/*
|
||||||
// WILD: feeds may not always have link to themselves
|
// WILD: feeds may not always have link to themselves
|
||||||
if len(feed.FeedLink) == 0 {
|
if len(feed.FeedLink) == 0 {
|
||||||
feed.FeedLink = candidateUrl
|
feed.FeedLink = candidateUrl
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// WILD: resolve relative links (path, without host)
|
// WILD: resolve relative links (path, without host)
|
||||||
|
/*
|
||||||
base, _ := url.Parse(candidateUrl)
|
base, _ := url.Parse(candidateUrl)
|
||||||
if link, err := url.Parse(feed.Link); err == nil && link.Host == "" {
|
if link, err := url.Parse(feed.Link); err == nil && link.Host == "" {
|
||||||
feed.Link = base.ResolveReference(link).String()
|
feed.Link = base.ResolveReference(link).String()
|
||||||
@ -88,23 +91,28 @@ func DiscoverFeed(candidateUrl string) (*gofeed.Feed, *[]FeedSource, error) {
|
|||||||
if link, err := url.Parse(feed.FeedLink); err == nil && link.Host == "" {
|
if link, err := url.Parse(feed.FeedLink); err == nil && link.Host == "" {
|
||||||
feed.FeedLink = base.ResolveReference(link).String()
|
feed.FeedLink = base.ResolveReference(link).String()
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
err := feed.TranslateURLs(candidateUrl)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("Failed to translate feed urls: %s", err)
|
||||||
|
}
|
||||||
|
|
||||||
return feed, nil, nil
|
return feed, candidateUrl, nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Possibly an html link. Search for feed links
|
// Possibly an html link. Search for feed links
|
||||||
sources, err := searchFeedLinks(content, candidateUrl)
|
sources, err := searchFeedLinks(content, candidateUrl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, "", nil, err
|
||||||
} else if len(sources) == 0 {
|
} else if len(sources) == 0 {
|
||||||
return nil, nil, errors.New("No feeds found at the given url")
|
return nil, "", nil, errors.New("No feeds found at the given url")
|
||||||
} else if len(sources) == 1 {
|
} else if len(sources) == 1 {
|
||||||
if sources[0].Url == candidateUrl {
|
if sources[0].Url == candidateUrl {
|
||||||
return nil, nil, errors.New("Recursion!")
|
return nil, "", nil, errors.New("Recursion!")
|
||||||
}
|
}
|
||||||
return DiscoverFeed(sources[0].Url)
|
return DiscoverFeed(sources[0].Url)
|
||||||
}
|
}
|
||||||
return nil, &sources, nil
|
return nil, "", &sources, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func FindFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
func FindFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
||||||
@ -163,17 +171,12 @@ func FindFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ConvertItems(items []*gofeed.Item, feed storage.Feed) []storage.Item {
|
func ConvertItems(items []feedparser.Item, feed storage.Feed) []storage.Item {
|
||||||
result := make([]storage.Item, len(items))
|
result := make([]storage.Item, len(items))
|
||||||
for i, item := range items {
|
for i, item := range items {
|
||||||
imageURL := ""
|
podcastUrl := item.PodcastURL
|
||||||
if item.Image != nil {
|
|
||||||
imageURL = item.Image.URL
|
/*
|
||||||
}
|
|
||||||
author := ""
|
|
||||||
if item.Author != nil {
|
|
||||||
author = item.Author.Name
|
|
||||||
}
|
|
||||||
var podcastUrl *string
|
var podcastUrl *string
|
||||||
if item.Enclosures != nil {
|
if item.Enclosures != nil {
|
||||||
for _, enclosure := range item.Enclosures {
|
for _, enclosure := range item.Enclosures {
|
||||||
@ -182,19 +185,19 @@ func ConvertItems(items []*gofeed.Item, feed storage.Feed) []storage.Item {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
result[i] = storage.Item{
|
result[i] = storage.Item{
|
||||||
GUID: item.GUID,
|
GUID: item.GUID,
|
||||||
FeedId: feed.Id,
|
FeedId: feed.Id,
|
||||||
Title: item.Title,
|
Title: item.Title,
|
||||||
Link: item.Link,
|
Link: item.URL,
|
||||||
Description: item.Description,
|
Description: "",
|
||||||
Content: item.Content,
|
Content: item.Content,
|
||||||
Author: author,
|
Author: "",
|
||||||
Date: item.PublishedParsed,
|
Date: &item.Date,
|
||||||
DateUpdated: item.UpdatedParsed,
|
|
||||||
Status: storage.UNREAD,
|
Status: storage.UNREAD,
|
||||||
Image: imageURL,
|
Image: item.ImageURL,
|
||||||
PodcastURL: podcastUrl,
|
PodcastURL: &podcastUrl,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
@ -231,7 +234,6 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
|
|||||||
db.SetHTTPState(f.Id, lastModified, etag)
|
db.SetHTTPState(f.Id, lastModified, etag)
|
||||||
}
|
}
|
||||||
|
|
||||||
feedparser := gofeed.NewParser()
|
|
||||||
feed, err := feedparser.Parse(res.Body)
|
feed, err := feedparser.Parse(res.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
Loading…
x
Reference in New Issue
Block a user