mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
fix page crawling encoding
This commit is contained in:
parent
b78c8bf8bf
commit
e3e9542f1e
@ -457,7 +457,7 @@ func (s *Server) handlePageCrawl(c *router.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
res, err := http.Get(url)
|
res, err := worker.GetHTTP(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Print(err)
|
log.Print(err)
|
||||||
c.Out.WriteHeader(http.StatusBadRequest)
|
c.Out.WriteHeader(http.StatusBadRequest)
|
||||||
|
@ -50,3 +50,16 @@ func init() {
|
|||||||
userAgent: "Yarr/1.0",
|
userAgent: "Yarr/1.0",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetHTTP(url string) (*http.Response, error) {
|
||||||
|
res, err := client.get(url)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
body, err := httpBody(res)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
res.Body = body
|
||||||
|
return res, nil
|
||||||
|
}
|
||||||
|
@ -202,10 +202,14 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
|
|||||||
return ConvertItems(feed.Items, f), nil
|
return ConvertItems(feed.Items, f), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func httpBody(res *http.Response) (io.Reader, error) {
|
func httpBody(res *http.Response) (io.ReadCloser, error) {
|
||||||
ctype := res.Header.Get("Content-Type")
|
ctype := res.Header.Get("Content-Type")
|
||||||
if strings.Contains(ctype, "charset") {
|
if strings.Contains(ctype, "charset") {
|
||||||
return charset.NewReader(res.Body, ctype)
|
reader, err := charset.NewReader(res.Body, ctype)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return io.NopCloser(reader), nil
|
||||||
}
|
}
|
||||||
return res.Body, nil
|
return res.Body, nil
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user