fix atom links

This commit is contained in:
Nazar Kanaev
2023-09-07 18:19:17 +01:00
parent 479aebd023
commit 850ce195a0
9 changed files with 89 additions and 32 deletions

View File

@@ -81,9 +81,16 @@ func ParseAtom(r io.Reader) (*Feed, error) {
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
}
for _, srcitem := range srcfeed.Entries {
link := firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First(""))
linkFromID := ""
guidFromID := ""
if htmlutil.IsAPossibleLink(srcitem.ID) {
linkFromID = srcitem.ID
guidFromID = srcitem.ID + "::" + srcitem.Updated
}
link := firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First(""), linkFromID)
dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(srcitem.ID, link),
GUID: firstNonEmpty(guidFromID, srcitem.ID, link),
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: link,
Title: srcitem.Title.Text(),

View File

@@ -131,3 +131,48 @@ func TestAtomImageLinkDuplicated(t *testing.T) {
t.Fatal("item.image_url must be unset if present in the content")
}
}
func TestAtomLinkInID(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
<entry>
<title>one updated</title>
<id>https://example.com/posts/1</id>
<updated>2003-12-13T09:17:51</updated>
</entry>
<entry>
<title>two</title>
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
</entry>
<entry>
<title>one</title>
<id>https://example.com/posts/1</id>
</entry>
</feed>
`))
have := feed.Items
want := []Item{
Item{
GUID: "https://example.com/posts/1::2003-12-13T09:17:51",
Date: time.Date(2003, time.December, 13, 9, 17, 51, 0, time.UTC),
URL: "https://example.com/posts/1",
Title: "one updated",
},
Item{
GUID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6",
Date: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC), URL: "",
Title: "two",
},
Item{
GUID: "https://example.com/posts/1::",
Date: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
URL: "https://example.com/posts/1",
Title: "one",
Content: "",
},
}
if !reflect.DeepEqual(want, have) {
t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have)
}
}

View File

@@ -20,7 +20,7 @@ type rssFeed struct {
}
type rssItem struct {
GUID rssGuid `xml:"guid"`
GUID rssGuid `xml:"guid"`
Title string `xml:"title"`
Link string `xml:"rss link"`
Description string `xml:"rss description"`
@@ -86,10 +86,10 @@ func ParseRSS(r io.Reader) (*Feed, error) {
}
}
permalink := ""
if srcitem.GUID.IsPermaLink == "true" {
permalink = srcitem.GUID.GUID
}
permalink := ""
if srcitem.GUID.IsPermaLink == "true" {
permalink = srcitem.GUID.GUID
}
dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(srcitem.GUID.GUID, srcitem.Link),

View File

@@ -217,11 +217,11 @@ func TestRSSIsPermalink(t *testing.T) {
`))
have := feed.Items
want := []Item{
{
GUID: "http://example.com/posts/1",
URL: "http://example.com/posts/1",
},
}
{
GUID: "http://example.com/posts/1",
URL: "http://example.com/posts/1",
},
}
for i := 0; i < len(want); i++ {
if want[i] != have[i] {
t.Errorf("Failed to handle isPermalink\nwant: %#v\nhave: %#v\n", want[i], have[i])