fix page crawling encoding

This commit is contained in:
nkanaev 2022-01-24 14:02:08 +00:00
parent b78c8bf8bf
commit e3e9542f1e
3 changed files with 20 additions and 3 deletions

View File

@ -457,7 +457,7 @@ func (s *Server) handlePageCrawl(c *router.Context) {
return return
} }
res, err := http.Get(url) res, err := worker.GetHTTP(url)
if err != nil { if err != nil {
log.Print(err) log.Print(err)
c.Out.WriteHeader(http.StatusBadRequest) c.Out.WriteHeader(http.StatusBadRequest)

View File

@ -50,3 +50,16 @@ func init() {
userAgent: "Yarr/1.0", userAgent: "Yarr/1.0",
} }
} }
func GetHTTP(url string) (*http.Response, error) {
res, err := client.get(url)
if err != nil {
return nil, err
}
body, err := httpBody(res)
if err != nil {
return nil, err
}
res.Body = body
return res, nil
}

View File

@ -202,10 +202,14 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
return ConvertItems(feed.Items, f), nil return ConvertItems(feed.Items, f), nil
} }
func httpBody(res *http.Response) (io.Reader, error) { func httpBody(res *http.Response) (io.ReadCloser, error) {
ctype := res.Header.Get("Content-Type") ctype := res.Header.Get("Content-Type")
if strings.Contains(ctype, "charset") { if strings.Contains(ctype, "charset") {
return charset.NewReader(res.Body, ctype) reader, err := charset.NewReader(res.Body, ctype)
if err != nil {
return nil, err
}
return io.NopCloser(reader), nil
} }
return res.Body, nil return res.Body, nil
} }