refactor feed search

This commit is contained in:
Nazar Kanaev 2020-09-02 12:24:10 +01:00
parent d9cd520396
commit 14ec19a3a6
2 changed files with 69 additions and 69 deletions

View File

@ -1,6 +1,8 @@
package server package server
import ( import (
"bytes"
"errors"
"fmt" "fmt"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/mmcdole/gofeed" "github.com/mmcdole/gofeed"
@ -29,12 +31,18 @@ const feedLinks = `
a:contains("FEED") a:contains("FEED")
` `
func FindFeeds(r *http.Response) ([]FeedSource, error) { func searchFeedLinks(html []byte, siteurl string) ([]FeedSource, error) {
sources := make([]FeedSource, 0, 0) sources := make([]FeedSource, 0, 0)
doc, err := goquery.NewDocumentFromResponse(r)
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
if err != nil { if err != nil {
return sources, err return sources, err
} }
base, err := url.Parse(siteurl)
if err != nil {
return sources, err
}
doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) { doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) {
if href, ok := s.Attr("href"); ok { if href, ok := s.Attr("href"); ok {
feedUrl, err := url.Parse(href) feedUrl, err := url.Parse(href)
@ -42,13 +50,56 @@ func FindFeeds(r *http.Response) ([]FeedSource, error) {
return return
} }
title := s.AttrOr("title", "") title := s.AttrOr("title", "")
url := doc.Url.ResolveReference(feedUrl).String() url := base.ResolveReference(feedUrl).String()
sources = append(sources, FeedSource{Title: title, Url: url}) sources = append(sources, FeedSource{Title: title, Url: url})
} }
}) })
return sources, nil return sources, nil
} }
func discoverFeed(url, userAgent string) (*gofeed.Feed, *[]FeedSource, error) {
// Query URL
feedreq, _ := http.NewRequest("GET", url, nil)
feedreq.Header.Set("user-agent", userAgent)
feedclient := &http.Client{}
res, err := feedclient.Do(feedreq)
if err != nil {
return nil, nil, err
} else if res.StatusCode != 200 {
errmsg := fmt.Sprintf("Failed to fetch feed %s (status: %d)", url, res.StatusCode)
return nil, nil, errors.New(errmsg)
}
content, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, nil, err
}
// Try to feed into parser
feedparser := gofeed.NewParser()
feed, err := feedparser.Parse(bytes.NewReader(content))
if err == nil {
// WILD: some feeds do not have link to itself
if len(feed.FeedLink) == 0 {
feed.FeedLink = url
}
return feed, nil, nil
}
// Possibly an html link. Search for feed links
sources, err := searchFeedLinks(content, url)
if err != nil {
return nil, nil, err
} else if len(sources) == 0 {
return nil, nil, errors.New("No feeds found at the given url")
} else if len(sources) == 1 {
if sources[0].Url == url {
return nil, nil, errors.New("Recursion!")
}
return discoverFeed(sources[0].Url, userAgent)
}
return nil, &sources, nil
}
func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) { func findFavicon(websiteUrl, feedUrl string) (*[]byte, error) {
candidateUrls := make([]string, 0) candidateUrls := make([]string, 0)
@ -142,24 +193,3 @@ func listItems(f storage.Feed) ([]storage.Item, error) {
} }
return convertItems(feed.Items, f), nil return convertItems(feed.Items, f), nil
} }
func createFeed(s *storage.Storage, url string, folderId *int64) error {
fp := gofeed.NewParser()
feed, err := fp.ParseURL(url)
if err != nil {
return err
}
feedLink := feed.FeedLink
if len(feedLink) == 0 {
feedLink = url
}
storedFeed := s.CreateFeed(
feed.Title,
feed.Description,
feed.Link,
feedLink,
folderId,
)
s.CreateItems(convertItems(feed.Items, *storedFeed))
return nil
}

View File

@ -220,64 +220,34 @@ func FeedListHandler(rw http.ResponseWriter, req *http.Request) {
list := db(req).ListFeeds() list := db(req).ListFeeds()
writeJSON(rw, list) writeJSON(rw, list)
} else if req.Method == "POST" { } else if req.Method == "POST" {
var feed FeedCreateForm var form FeedCreateForm
if err := json.NewDecoder(req.Body).Decode(&feed); err != nil { if err := json.NewDecoder(req.Body).Decode(&form); err != nil {
handler(req).log.Print(err) handler(req).log.Print(err)
rw.WriteHeader(http.StatusBadRequest) rw.WriteHeader(http.StatusBadRequest)
return return
} }
feedUrl := feed.Url feed, sources, err := discoverFeed(form.Url, req.Header.Get("user-agent"))
feedreq, _ := http.NewRequest("GET", feedUrl, nil)
feedreq.Header.Set("user-agent", req.Header.Get("user-agent"))
feedclient := &http.Client{}
res, err := feedclient.Do(feedreq)
if err != nil { if err != nil {
handler(req).log.Print(err) handler(req).log.Print(err)
writeJSON(rw, map[string]string{"status": "notfound"}) writeJSON(rw, map[string]string{"status": "notfound"})
return return
} else if res.StatusCode != 200 {
handler(req).log.Printf("Failed to fetch %s (status: %d)", feedUrl, res.StatusCode)
body, err := ioutil.ReadAll(res.Body)
handler(req).log.Print(string(body), err)
writeJSON(rw, map[string]string{"status": "notfound"})
return
} }
contentType := res.Header.Get("Content-Type") if feed != nil {
if strings.HasPrefix(contentType, "text/html") || contentType == "" { storedFeed := db(req).CreateFeed(
sources, err := FindFeeds(res) feed.Title,
if err != nil { feed.Description,
handler(req).log.Print(err) feed.Link,
writeJSON(rw, map[string]string{"status": "notfound"}) feed.FeedLink,
return form.FolderID,
} )
if len(sources) == 0 { db(req).CreateItems(convertItems(feed.Items, *storedFeed))
writeJSON(rw, map[string]string{"status": "notfound"})
} else if len(sources) > 1 {
writeJSON(rw, map[string]interface{}{
"status": "multiple",
"choice": sources,
})
} else if len(sources) == 1 {
feedUrl = sources[0].Url
err = createFeed(db(req), feedUrl, feed.FolderID)
if err != nil {
handler(req).log.Print(err)
rw.WriteHeader(http.StatusBadRequest)
return
}
writeJSON(rw, map[string]string{"status": "success"}) writeJSON(rw, map[string]string{"status": "success"})
} } else if sources != nil {
} else if strings.Contains(contentType, "xml") || strings.Contains(contentType, "json") { writeJSON(rw, map[string]interface{}{"status": "multiple", "choice": sources})
// text/xml, application/xml, application/rss+xml, application/atom+xml
err = createFeed(db(req), feedUrl, feed.FolderID)
if err == nil {
writeJSON(rw, map[string]string{"status": "success"})
}
} else { } else {
writeJSON(rw, map[string]string{"status": "notfound"}) writeJSON(rw, map[string]string{"status": "notfound"})
return
} }
} }
} }