fix page crawling encoding

This commit is contained in:
nkanaev 2022-01-24 14:02:08 +00:00
parent b78c8bf8bf
commit e3e9542f1e
3 changed files with 20 additions and 3 deletions

View File

@ -457,7 +457,7 @@ func (s *Server) handlePageCrawl(c *router.Context) {
return
}
res, err := http.Get(url)
res, err := worker.GetHTTP(url)
if err != nil {
log.Print(err)
c.Out.WriteHeader(http.StatusBadRequest)

View File

@ -50,3 +50,16 @@ func init() {
userAgent: "Yarr/1.0",
}
}
func GetHTTP(url string) (*http.Response, error) {
res, err := client.get(url)
if err != nil {
return nil, err
}
body, err := httpBody(res)
if err != nil {
return nil, err
}
res.Body = body
return res, nil
}

View File

@ -202,10 +202,14 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
return ConvertItems(feed.Items, f), nil
}
func httpBody(res *http.Response) (io.Reader, error) {
func httpBody(res *http.Response) (io.ReadCloser, error) {
ctype := res.Header.Get("Content-Type")
if strings.Contains(ctype, "charset") {
return charset.NewReader(res.Body, ctype)
reader, err := charset.NewReader(res.Body, ctype)
if err != nil {
return nil, err
}
return io.NopCloser(reader), nil
}
return res.Body, nil
}