diff --git a/src/server/routes.go b/src/server/routes.go index 97d14ea..ec32b25 100644 --- a/src/server/routes.go +++ b/src/server/routes.go @@ -457,7 +457,7 @@ func (s *Server) handlePageCrawl(c *router.Context) { return } - res, err := http.Get(url) + res, err := worker.GetHTTP(url) if err != nil { log.Print(err) c.Out.WriteHeader(http.StatusBadRequest) diff --git a/src/worker/client.go b/src/worker/client.go index d7cac5d..49ab3b3 100644 --- a/src/worker/client.go +++ b/src/worker/client.go @@ -50,3 +50,16 @@ func init() { userAgent: "Yarr/1.0", } } + +func GetHTTP(url string) (*http.Response, error) { + res, err := client.get(url) + if err != nil { + return nil, err + } + body, err := httpBody(res) + if err != nil { + return nil, err + } + res.Body = body + return res, nil +} diff --git a/src/worker/crawler.go b/src/worker/crawler.go index e67e7c0..87d642a 100644 --- a/src/worker/crawler.go +++ b/src/worker/crawler.go @@ -202,10 +202,14 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) { return ConvertItems(feed.Items, f), nil } -func httpBody(res *http.Response) (io.Reader, error) { +func httpBody(res *http.Response) (io.ReadCloser, error) { ctype := res.Header.Get("Content-Type") if strings.Contains(ctype, "charset") { - return charset.NewReader(res.Body, ctype) + reader, err := charset.NewReader(res.Body, ctype) + if err != nil { + return nil, err + } + return io.NopCloser(reader), nil } return res.Body, nil }