feed finder

This commit is contained in:
Nazar Kanaev 2020-06-29 13:31:36 +01:00
parent a91a13fa1f
commit 6349e97fdf
3 changed files with 79 additions and 18 deletions

View File

@ -1,12 +1,15 @@
package server package server
import ( import (
"github.com/nkanaev/yarr/worker"
"net/http" "net/http"
"encoding/json"
"os" "os"
"log" "log"
"io" "io"
"fmt" "fmt"
"mime" "mime"
"strings"
) )
func IndexHandler(rw http.ResponseWriter, req *http.Request) { func IndexHandler(rw http.ResponseWriter, req *http.Request) {
@ -40,7 +43,54 @@ func FolderListHandler(rw http.ResponseWriter, req *http.Request) {
func FolderHandler(rw http.ResponseWriter, req *http.Request) { func FolderHandler(rw http.ResponseWriter, req *http.Request) {
} }
type NewFeed struct {
Url string `json:"url"`
FolderID int64 `json:"folder_id,omitempty"`
}
func FeedListHandler(rw http.ResponseWriter, req *http.Request) { func FeedListHandler(rw http.ResponseWriter, req *http.Request) {
if req.Method == "POST" {
var feed NewFeed
if err := json.NewDecoder(req.Body).Decode(&feed); err != nil {
log.Print(err)
rw.WriteHeader(http.StatusBadRequest)
return
}
feedUrl := feed.Url
res, err := http.Get(feedUrl)
if err != nil {
log.Print(err)
rw.WriteHeader(http.StatusBadRequest)
return
} else if res.StatusCode != 200 {
rw.WriteHeader(http.StatusBadRequest)
return
}
contentType := res.Header.Get("Content-Type")
if strings.HasPrefix(contentType, "text/html") {
sources, err := worker.FindFeeds(res)
if err != nil {
log.Print(err)
}
if len(sources) == 0 {
writeJSON(rw, map[string]string{"status": "notfound"})
} else if len(sources) > 1 {
writeJSON(rw, map[string]interface{}{
"status": "multiple",
"choice": sources,
})
} else if len(sources) == 1 {
feedUrl = sources[0].Url
fmt.Println("feedUrl:", feedUrl)
writeJSON(rw, map[string]string{"status": "success"})
}
fmt.Println("got html url", sources, feedUrl)
} else if strings.HasPrefix(contentType, "text/xml") {
log.Print("got rss feed")
}
log.Print(res.Header.Get("Content-Type"))
}
} }
func FeedHandler(rw http.ResponseWriter, req *http.Request) { func FeedHandler(rw http.ResponseWriter, req *http.Request) {

View File

@ -1,9 +1,11 @@
package server package server
import ( import (
"encoding/json"
"context" "context"
"regexp" "regexp"
"net/http" "net/http"
"log"
) )
type Route struct { type Route struct {
@ -38,6 +40,7 @@ var routes []Route = []Route{
p("/api/folders/:id", FolderHandler), p("/api/folders/:id", FolderHandler),
p("/api/feeds", FeedListHandler), p("/api/feeds", FeedListHandler),
p("/api/feeds/:id", FeedHandler), p("/api/feeds/:id", FeedHandler),
p("/api/feeds/find", FeedHandler),
} }
func Vars(req *http.Request) map[string]string { func Vars(req *http.Request) map[string]string {
@ -66,6 +69,16 @@ func (h Handler) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
rw.WriteHeader(http.StatusNotFound) rw.WriteHeader(http.StatusNotFound)
} }
func writeJSON(rw http.ResponseWriter, data interface{}) {
rw.Header().Set("Content-Type", "application/json; charset=utf-8")
reply, err := json.Marshal(data)
if err != nil {
log.Fatal(err)
}
rw.Write(reply)
rw.Write([]byte("\n"))
}
func New() *http.Server { func New() *http.Server {
h := Handler{} h := Handler{}
s := &http.Server{Addr: "127.0.0.1:8000", Handler: h} s := &http.Server{Addr: "127.0.0.1:8000", Handler: h}

View File

@ -1,37 +1,35 @@
package worker package worker
import ( import (
"log"
//"net/http"
"net/url" "net/url"
"net/http"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
type FeedSource struct { type FeedSource struct {
Title string Title string `json:"title"`
Url *url.URL Url string `json:"url"`
} }
const feedLinks = `link[type='application/rss+xml'],link[type='application/atom+xml']`
func FindFeeds(u string) []FeedSource {
doc, err := goquery.NewDocument(u)
if err != nil {
log.Fatal(err)
}
log.Print(doc.Url) func FindFeeds(r *http.Response) ([]FeedSource, error) {
// Find the review items
sources := make([]FeedSource, 0, 0) sources := make([]FeedSource, 0, 0)
doc.Find("link[type='application/rss+xml'],link[type='application/atom+xml']").Each(func(i int, s *goquery.Selection) { doc, err := goquery.NewDocumentFromResponse(r)
if err != nil {
return sources, err
}
doc.Find(feedLinks).Each(func(i int, s *goquery.Selection) {
if href, ok := s.Attr("href"); ok { if href, ok := s.Attr("href"); ok {
feedUrl, feedErr := url.Parse(href) feedUrl, err := url.Parse(href)
if feedErr != nil { if err != nil {
log.Fatal(err) return
} }
title := s.AttrOr("title", "") title := s.AttrOr("title", "")
feedUrl = doc.Url.ResolveReference(feedUrl) url := doc.Url.ResolveReference(feedUrl).String()
sources = append(sources, FeedSource{Title: title, Url: feedUrl}) sources = append(sources, FeedSource{Title: title, Url: url})
} }
}) })
return sources return sources, nil
} }