unsset audio/image if present in the content

This commit is contained in:
Nazar Kanaev 2021-04-04 21:31:25 +01:00
parent 0828d6782e
commit 63ad971890
4 changed files with 91 additions and 3 deletions

View File

@ -93,3 +93,41 @@ func TestAtomHTMLTitle(t *testing.T) {
t.FailNow() t.FailNow()
} }
} }
func TestAtomImageLink(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<entry>
<media:thumbnail url="https://example.com/image.png?width=100&height=100" />
</entry>
</feed>
`))
have := feed.Items[0].ImageURL
want := `https://example.com/image.png?width=100&height=100`
if want != have {
t.Fatalf("item.image_url doesn't match\nwant: %#v\nhave: %#v\n", want, have)
}
}
// found in: https://www.reddit.com/r/funny.rss
// items come with thumbnail urls which are also present in the content
func TestAtomImageLinkDuplicated(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<entry>
<content type="html">&lt;img src="https://example.com/image.png?width=100&amp;height=100"&gt;</content>
<media:thumbnail url="https://example.com/image.png?width=100&height=100" />
</entry>
</feed>
`))
have := feed.Items[0].Content
want := `<img src="https://example.com/image.png?width=100&height=100">`
if want != have {
t.Fatalf("want: %#v\nhave: %#v\n", want, have)
}
if feed.Items[0].ImageURL != "" {
t.Fatal("item.image_url must be unset if present in the content")
}
}

View File

@ -75,6 +75,13 @@ func (feed *Feed) cleanup() {
feed.Items[i].URL = strings.TrimSpace(item.URL) feed.Items[i].URL = strings.TrimSpace(item.URL)
feed.Items[i].Title = strings.TrimSpace(item.Title) feed.Items[i].Title = strings.TrimSpace(item.Title)
feed.Items[i].Content = strings.TrimSpace(item.Content) feed.Items[i].Content = strings.TrimSpace(item.Content)
if item.ImageURL != "" && strings.Contains(item.Content, item.ImageURL) {
feed.Items[i].ImageURL = ""
}
if item.AudioURL != "" && strings.Contains(item.Content, item.AudioURL) {
feed.Items[i].AudioURL = ""
}
} }
} }

View File

@ -77,11 +77,9 @@ func ParseRSS(r io.Reader) (*Feed, error) {
if e.Type == "audio/mpeg" || e.Type == "audio/x-m4a" { if e.Type == "audio/mpeg" || e.Type == "audio/x-m4a" {
podcastURL = e.URL podcastURL = e.URL
origBase := path.Base(srcitem.OrigEnclosureLink) if srcitem.OrigEnclosureLink != "" && strings.Contains(podcastURL, path.Base(srcitem.OrigEnclosureLink)) {
if origBase != "" && strings.Contains(podcastURL, origBase) {
podcastURL = srcitem.OrigEnclosureLink podcastURL = srcitem.OrigEnclosureLink
} }
break break
} }
} }

View File

@ -115,3 +115,48 @@ func TestRSSWithLotsOfSpaces(t *testing.T) {
t.FailNow() t.FailNow()
} }
} }
func TestRSSPodcast(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<item>
<enclosure length="100500" type="audio/x-m4a" url="http://example.com/audio.ext"/>
</item>
</channel>
</rss>
`))
have := feed.Items[0].AudioURL
want := "http://example.com/audio.ext"
if want != have {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.FailNow()
}
}
// found in: https://podcast.cscript.site/podcast.xml
func TestRSSPodcastDuplicated(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<content:encoded>
<![CDATA[ <audio src="http://example.com/audio.ext"></audio> ]]>
</content:encoded>
<enclosure length="100500" type="audio/x-m4a" url="http://example.com/audio.ext"/>
</item>
</channel>
</rss>
`))
have := feed.Items[0].Content
want := `<audio src="http://example.com/audio.ext"></audio>`
if want != have {
t.Fatalf("content doesn't match\nwant: %#v\nhave: %#v\n", want, have)
}
if feed.Items[0].AudioURL != "" {
t.Fatal("item.audio_url must be unset if present in the content")
}
}