From 6349e97fdfe9a1912692245ba7163fdcceb3d166 Mon Sep 17 00:00:00 2001 From: Nazar Kanaev Date: Mon, 29 Jun 2020 13:31:36 +0100 Subject: [PATCH] feed finder --- server/handlers.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++ server/server.go | 13 ++++++++++++ worker/finder.go | 34 +++++++++++++++---------------- 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/server/handlers.go b/server/handlers.go index ea47987..b2a0165 100644 --- a/server/handlers.go +++ b/server/handlers.go @@ -1,12 +1,15 @@ package server import ( + "github.com/nkanaev/yarr/worker" "net/http" + "encoding/json" "os" "log" "io" "fmt" "mime" + "strings" ) func IndexHandler(rw http.ResponseWriter, req *http.Request) { @@ -40,7 +43,54 @@ func FolderListHandler(rw http.ResponseWriter, req *http.Request) { func FolderHandler(rw http.ResponseWriter, req *http.Request) { } +type NewFeed struct { + Url string `json:"url"` + FolderID int64 `json:"folder_id,omitempty"` +} + func FeedListHandler(rw http.ResponseWriter, req *http.Request) { + if req.Method == "POST" { + var feed NewFeed + if err := json.NewDecoder(req.Body).Decode(&feed); err != nil { + log.Print(err) + rw.WriteHeader(http.StatusBadRequest) + return + } + feedUrl := feed.Url + res, err := http.Get(feedUrl) + if err != nil { + log.Print(err) + rw.WriteHeader(http.StatusBadRequest) + return + } else if res.StatusCode != 200 { + rw.WriteHeader(http.StatusBadRequest) + return + } + + contentType := res.Header.Get("Content-Type") + if strings.HasPrefix(contentType, "text/html") { + sources, err := worker.FindFeeds(res) + if err != nil { + log.Print(err) + } + if len(sources) == 0 { + writeJSON(rw, map[string]string{"status": "notfound"}) + } else if len(sources) > 1 { + writeJSON(rw, map[string]interface{}{ + "status": "multiple", + "choice": sources, + }) + } else if len(sources) == 1 { + feedUrl = sources[0].Url + fmt.Println("feedUrl:", feedUrl) + writeJSON(rw, map[string]string{"status": "success"}) + } + fmt.Println("got html url", sources, feedUrl) + } else if strings.HasPrefix(contentType, "text/xml") { + log.Print("got rss feed") + } + log.Print(res.Header.Get("Content-Type")) + } } func FeedHandler(rw http.ResponseWriter, req *http.Request) { diff --git a/server/server.go b/server/server.go index 6718a45..2baaaa6 100644 --- a/server/server.go +++ b/server/server.go @@ -1,9 +1,11 @@ package server import ( + "encoding/json" "context" "regexp" "net/http" + "log" ) type Route struct { @@ -38,6 +40,7 @@ var routes []Route = []Route{ p("/api/folders/:id", FolderHandler), p("/api/feeds", FeedListHandler), p("/api/feeds/:id", FeedHandler), + p("/api/feeds/find", FeedHandler), } func Vars(req *http.Request) map[string]string { @@ -66,6 +69,16 @@ func (h Handler) ServeHTTP(rw http.ResponseWriter, req *http.Request) { rw.WriteHeader(http.StatusNotFound) } +func writeJSON(rw http.ResponseWriter, data interface{}) { + rw.Header().Set("Content-Type", "application/json; charset=utf-8") + reply, err := json.Marshal(data) + if err != nil { + log.Fatal(err) + } + rw.Write(reply) + rw.Write([]byte("\n")) +} + func New() *http.Server { h := Handler{} s := &http.Server{Addr: "127.0.0.1:8000", Handler: h} diff --git a/worker/finder.go b/worker/finder.go index ec5c6af..3cecd70 100644 --- a/worker/finder.go +++ b/worker/finder.go @@ -1,37 +1,35 @@ package worker import ( - "log" - //"net/http" "net/url" + "net/http" "github.com/PuerkitoBio/goquery" ) type FeedSource struct { - Title string - Url *url.URL + Title string `json:"title"` + Url string `json:"url"` } +const feedLinks = `link[type='application/rss+xml'],link[type='application/atom+xml']` -func FindFeeds(u string) []FeedSource { - doc, err := goquery.NewDocument(u) - if err != nil { - log.Fatal(err) - } - log.Print(doc.Url) - // Find the review items +func FindFeeds(r *http.Response) ([]FeedSource, error) { sources := make([]FeedSource, 0, 0) - doc.Find("link[type='application/rss+xml'],link[type='application/atom+xml']").Each(func(i int, s *goquery.Selection) { + doc, err := goquery.NewDocumentFromResponse(r) + if err != nil { + return sources, err + } + doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) { if href, ok := s.Attr("href"); ok { - feedUrl, feedErr := url.Parse(href) - if feedErr != nil { - log.Fatal(err) + feedUrl, err := url.Parse(href) + if err != nil { + return } title := s.AttrOr("title", "") - feedUrl = doc.Url.ResolveReference(feedUrl) - sources = append(sources, FeedSource{Title: title, Url: feedUrl}) + url := doc.Url.ResolveReference(feedUrl).String() + sources = append(sources, FeedSource{Title: title, Url: url}) } }) - return sources + return sources, nil }