From e62906e63dee06492e4ed5f659f251394ba2a1c6 Mon Sep 17 00:00:00 2001 From: nkanaev Date: Mon, 14 Nov 2022 15:11:05 +0000 Subject: [PATCH] fix readability edge case --- doc/changelog.txt | 1 + src/content/readability/readability.go | 4 ++++ src/server/routes.go | 5 +++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 1552378..800baec 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -6,6 +6,7 @@ - (fix) favicon visibility in dark mode (thanks to @caycaycarly for the report) - (fix) autoloading more articles not working in certain edge cases (thanks to @fenuks for the report) - (fix) handle Google URL redirects in "Read Here" (thanks to @cubbei for discovery) +- (fix) handle failures to extract content in "Read Here" (thanks to @grigio for the report) # v2.3 (2022-05-03) diff --git a/src/content/readability/readability.go b/src/content/readability/readability.go index 324d7f3..86e9086 100644 --- a/src/content/readability/readability.go +++ b/src/content/readability/readability.go @@ -6,6 +6,7 @@ package readability import ( "bytes" + "errors" "fmt" "io" "math" @@ -59,6 +60,9 @@ func ExtractContent(page io.Reader) (string, error) { best = body break } + if best == nil { + return "", errors.New("failed to extract content") + } } //log.Printf("[Readability] TopCandidate: %v", topCandidate) diff --git a/src/server/routes.go b/src/server/routes.go index 00991e2..d9fa61a 100644 --- a/src/server/routes.go +++ b/src/server/routes.go @@ -474,8 +474,9 @@ func (s *Server) handlePageCrawl(c *router.Context) { } content, err := readability.ExtractContent(strings.NewReader(body)) if err != nil { - log.Print(err) - c.Out.WriteHeader(http.StatusNoContent) + c.JSON(http.StatusOK, map[string]string{ + "content": "error: " + err.Error(), + }) return } content = sanitizer.Sanitize(url, content)