refactor feed search

This commit is contained in:
Nazar Kanaev 2020-09-02 12:24:10 +01:00
parent d9cd520396
commit 14ec19a3a6
2 changed files with 69 additions and 69 deletions

View File

@ -1,6 +1,8 @@
package server
import (
"bytes"
"errors"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/mmcdole/gofeed"
@ -29,12 +31,18 @@ const feedLinks = `
a:contains("FEED")
`
func FindFeeds(r *http.Response) ([]FeedSource, error) {
func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
sources := make([]FeedSource, 0, 0)
doc, err := goquery.NewDocumentFromResponse(r)
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
if err != nil {
return sources, err
}
base, err := url.Parse(siteurl)
if err != nil {
return sources, err
}
doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) {
if href, ok := s.Attr("href"); ok {
feedUrl, err := url.Parse(href)
@ -42,13 +50,56 @@ func FindFeeds(r *http.Response) ([]FeedSource, error) {
return
}
title := s.AttrOr("title", "")
url := doc.Url.ResolveReference(feedUrl).String()
url := base.ResolveReference(feedUrl).String()
sources = append(sources, FeedSource{Title: title, Url: url})
}
})
return sources, nil
}
func discoverFeed(url, userAgent string) (*gofeed.Feed, *[]FeedSource, error) {
// Query URL
feedreq, _ := http.NewRequest("GET", url, nil)
feedreq.Header.Set("user-agent", userAgent)
feedclient := &http.Client{}
res, err := feedclient.Do(feedreq)
if err != nil {
return nil, nil, err
} else if res.StatusCode != 200 {
errmsg := fmt.Sprintf("Failed to fetch feed %s (status: %d)", url, res.StatusCode)
return nil, nil, errors.New(errmsg)
}
content, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, nil, err
}
// Try to feed into parser
feedparser := gofeed.NewParser()
feed, err := feedparser.Parse(bytes.NewReader(content))
if err == nil {
// WILD: some feeds do not have link to itself
if len(feed.FeedLink) == 0 {
feed.FeedLink = url
}
return feed, nil, nil
}
// Possibly an html link. Search for feed links
sources, err := searchFeedLinks(content, url)
if err != nil {
return nil, nil, err
} else if len(sources) == 0 {
return nil, nil, errors.New("No feeds found at the given url")
} else if len(sources) == 1 {
if sources[0].Url == url {
return nil, nil, errors.New("Recursion!")
}
return discoverFeed(sources[0].Url, userAgent)
}
return nil, &sources, nil
}
func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
candidateUrls := make([]string, 0)
@ -142,24 +193,3 @@ func listItems(f storage.Feed) ([]storage.Item, error) {
}
return convertItems(feed.Items, f), nil
}
func createFeed(s *storage.Storage, url string, folderId *int64) error {
fp := gofeed.NewParser()
feed, err := fp.ParseURL(url)
if err != nil {
return err
}
feedLink := feed.FeedLink
if len(feedLink) == 0 {
feedLink = url
}
storedFeed := s.CreateFeed(
feed.Title,
feed.Description,
feed.Link,
feedLink,
folderId,
)
s.CreateItems(convertItems(feed.Items, *storedFeed))
return nil
}

View File

@ -220,64 +220,34 @@ func FeedListHandler(rw http.ResponseWriter, req *http.Request) {
list := db(req).ListFeeds()
writeJSON(rw, list)
} else if req.Method == "POST" {
var feed FeedCreateForm
if err := json.NewDecoder(req.Body).Decode(&feed); err != nil {
var form FeedCreateForm
if err := json.NewDecoder(req.Body).Decode(&form); err != nil {
handler(req).log.Print(err)
rw.WriteHeader(http.StatusBadRequest)
return
}
feedUrl := feed.Url
feedreq, _ := http.NewRequest("GET", feedUrl, nil)
feedreq.Header.Set("user-agent", req.Header.Get("user-agent"))
feedclient := &http.Client{}
res, err := feedclient.Do(feedreq)
feed, sources, err := discoverFeed(form.Url, req.Header.Get("user-agent"))
if err != nil {
handler(req).log.Print(err)
writeJSON(rw, map[string]string{"status": "notfound"})
return
} else if res.StatusCode != 200 {
handler(req).log.Printf("Failed to fetch %s (status: %d)", feedUrl, res.StatusCode)
body, err := ioutil.ReadAll(res.Body)
handler(req).log.Print(string(body), err)
writeJSON(rw, map[string]string{"status": "notfound"})
return
}
contentType := res.Header.Get("Content-Type")
if strings.HasPrefix(contentType, "text/html") || contentType == "" {
sources, err := FindFeeds(res)
if err != nil {
handler(req).log.Print(err)
writeJSON(rw, map[string]string{"status": "notfound"})
return
}
if len(sources) == 0 {
writeJSON(rw, map[string]string{"status": "notfound"})
} else if len(sources) > 1 {
writeJSON(rw, map[string]interface{}{
"status": "multiple",
"choice": sources,
})
} else if len(sources) == 1 {
feedUrl = sources[0].Url
err = createFeed(db(req), feedUrl, feed.FolderID)
if err != nil {
handler(req).log.Print(err)
rw.WriteHeader(http.StatusBadRequest)
return
}
writeJSON(rw, map[string]string{"status": "success"})
}
} else if strings.Contains(contentType, "xml") || strings.Contains(contentType, "json") {
// text/xml, application/xml, application/rss+xml, application/atom+xml
err = createFeed(db(req), feedUrl, feed.FolderID)
if err == nil {
writeJSON(rw, map[string]string{"status": "success"})
}
if feed != nil {
storedFeed := db(req).CreateFeed(
feed.Title,
feed.Description,
feed.Link,
feed.FeedLink,
form.FolderID,
)
db(req).CreateItems(convertItems(feed.Items, *storedFeed))
writeJSON(rw, map[string]string{"status": "success"})
} else if sources != nil {
writeJSON(rw, map[string]interface{}{"status": "multiple", "choice": sources})
} else {
writeJSON(rw, map[string]string{"status": "notfound"})
return
}
}
}