From 26b87dee98cec2e1bb46f7656edfb7483f476171 Mon Sep 17 00:00:00 2001 From: nkanaev Date: Wed, 10 Nov 2021 10:54:12 +0000 Subject: [PATCH] remove html tags from titles --- doc/todo.txt | 2 -- src/parser/feed.go | 4 +++- src/parser/rss_test.go | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/doc/todo.txt b/doc/todo.txt index 05cdc7b..fb20fb8 100644 --- a/doc/todo.txt +++ b/doc/todo.txt @@ -1,5 +1,3 @@ -- strip out html tags from titles - https://www.aldaily.com/feed/ - feedlist keyboard navigation is flaky in "unread" section - windows cli mode not working incorporate changes from: diff --git a/src/parser/feed.go b/src/parser/feed.go index bf77764..3846764 100644 --- a/src/parser/feed.go +++ b/src/parser/feed.go @@ -9,6 +9,8 @@ import ( "net/url" "strings" "time" + + "github.com/nkanaev/yarr/src/content/htmlutil" ) var UnknownFormat = errors.New("unknown feed format") @@ -80,7 +82,7 @@ func (feed *Feed) cleanup() { for i, item := range feed.Items { feed.Items[i].GUID = strings.TrimSpace(item.GUID) feed.Items[i].URL = strings.TrimSpace(item.URL) - feed.Items[i].Title = strings.TrimSpace(item.Title) + feed.Items[i].Title = strings.TrimSpace(htmlutil.ExtractText(item.Title)) feed.Items[i].Content = strings.TrimSpace(item.Content) if item.ImageURL != "" && strings.Contains(item.Content, item.ImageURL) { diff --git a/src/parser/rss_test.go b/src/parser/rss_test.go index 65d0b41..e6eb8a1 100644 --- a/src/parser/rss_test.go +++ b/src/parser/rss_test.go @@ -180,3 +180,26 @@ func TestRSSPodcastDuplicated(t *testing.T) { t.Fatal("item.audio_url must be unset if present in the content") } } + +func TestRSSTitleHTMLTags(t *testing.T) { + feed, _ := Parse(strings.NewReader(` + + + + + <p>title in p</p> + + + very <strong>strong</strong> title + + + + `)) + have := []string{feed.Items[0].Title, feed.Items[1].Title} + want := []string{"title in p", "very strong title"} + for i := 0; i < len(want); i++ { + if want[i] != have[i] { + t.Errorf("title doesn't match\nwant: %#v\nhave: %#v\n", want[i], have[i]) + } + } +}