diff --git a/src/parser/atom_test.go b/src/parser/atom_test.go
index 9b1c423..fa01297 100644
--- a/src/parser/atom_test.go
+++ b/src/parser/atom_test.go
@@ -93,3 +93,41 @@ func TestAtomHTMLTitle(t *testing.T) {
t.FailNow()
}
}
+
+func TestAtomImageLink(t *testing.T) {
+ feed, _ := Parse(strings.NewReader(`
+
+
+
+
+
+
+ `))
+ have := feed.Items[0].ImageURL
+ want := `https://example.com/image.png?width=100&height=100`
+ if want != have {
+ t.Fatalf("item.image_url doesn't match\nwant: %#v\nhave: %#v\n", want, have)
+ }
+}
+
+// found in: https://www.reddit.com/r/funny.rss
+// items come with thumbnail urls which are also present in the content
+func TestAtomImageLinkDuplicated(t *testing.T) {
+ feed, _ := Parse(strings.NewReader(`
+
+
+
+ <img src="https://example.com/image.png?width=100&height=100">
+
+
+
+ `))
+ have := feed.Items[0].Content
+ want := `
`
+ if want != have {
+ t.Fatalf("want: %#v\nhave: %#v\n", want, have)
+ }
+ if feed.Items[0].ImageURL != "" {
+ t.Fatal("item.image_url must be unset if present in the content")
+ }
+}
diff --git a/src/parser/feed.go b/src/parser/feed.go
index 504ed5d..925d8c1 100644
--- a/src/parser/feed.go
+++ b/src/parser/feed.go
@@ -75,6 +75,13 @@ func (feed *Feed) cleanup() {
feed.Items[i].URL = strings.TrimSpace(item.URL)
feed.Items[i].Title = strings.TrimSpace(item.Title)
feed.Items[i].Content = strings.TrimSpace(item.Content)
+
+ if item.ImageURL != "" && strings.Contains(item.Content, item.ImageURL) {
+ feed.Items[i].ImageURL = ""
+ }
+ if item.AudioURL != "" && strings.Contains(item.Content, item.AudioURL) {
+ feed.Items[i].AudioURL = ""
+ }
}
}
diff --git a/src/parser/rss.go b/src/parser/rss.go
index 114d3c0..01b6829 100644
--- a/src/parser/rss.go
+++ b/src/parser/rss.go
@@ -77,11 +77,9 @@ func ParseRSS(r io.Reader) (*Feed, error) {
if e.Type == "audio/mpeg" || e.Type == "audio/x-m4a" {
podcastURL = e.URL
- origBase := path.Base(srcitem.OrigEnclosureLink)
- if origBase != "" && strings.Contains(podcastURL, origBase) {
+ if srcitem.OrigEnclosureLink != "" && strings.Contains(podcastURL, path.Base(srcitem.OrigEnclosureLink)) {
podcastURL = srcitem.OrigEnclosureLink
}
-
break
}
}
diff --git a/src/parser/rss_test.go b/src/parser/rss_test.go
index 0baefc8..58e9c5d 100644
--- a/src/parser/rss_test.go
+++ b/src/parser/rss_test.go
@@ -115,3 +115,48 @@ func TestRSSWithLotsOfSpaces(t *testing.T) {
t.FailNow()
}
}
+
+func TestRSSPodcast(t *testing.T) {
+ feed, _ := Parse(strings.NewReader(`
+
+
+
+ -
+
+
+
+
+ `))
+ have := feed.Items[0].AudioURL
+ want := "http://example.com/audio.ext"
+ if want != have {
+ t.Logf("want: %#v", want)
+ t.Logf("have: %#v", have)
+ t.FailNow()
+ }
+}
+
+// found in: https://podcast.cscript.site/podcast.xml
+func TestRSSPodcastDuplicated(t *testing.T) {
+ feed, _ := Parse(strings.NewReader(`
+
+
+
+ -
+
+ ]]>
+
+
+
+
+
+ `))
+ have := feed.Items[0].Content
+ want := ``
+ if want != have {
+ t.Fatalf("content doesn't match\nwant: %#v\nhave: %#v\n", want, have)
+ }
+ if feed.Items[0].AudioURL != "" {
+ t.Fatal("item.audio_url must be unset if present in the content")
+ }
+}