diff --git a/src/content/htmlutil/urlutils.go b/src/content/htmlutil/urlutils.go index f5c02ac..f3af764 100644 --- a/src/content/htmlutil/urlutils.go +++ b/src/content/htmlutil/urlutils.go @@ -2,6 +2,7 @@ package htmlutil import ( "net/url" + "strings" ) func Any(els []string, el string, match func(string, string) bool) bool { @@ -31,3 +32,7 @@ func URLDomain(val string) string { } return val } + +func IsAPossibleLink(val string) bool { + return strings.HasPrefix(val, "http://") || strings.HasPrefix(val, "https://") +} diff --git a/src/parser/atom.go b/src/parser/atom.go index b772076..361c747 100644 --- a/src/parser/atom.go +++ b/src/parser/atom.go @@ -81,9 +81,16 @@ func ParseAtom(r io.Reader) (*Feed, error) { SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")), } for _, srcitem := range srcfeed.Entries { - link := firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")) + linkFromID := "" + guidFromID := "" + if htmlutil.IsAPossibleLink(srcitem.ID) { + linkFromID = srcitem.ID + guidFromID = srcitem.ID + "::" + srcitem.Updated + } + + link := firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First(""), linkFromID) dstfeed.Items = append(dstfeed.Items, Item{ - GUID: firstNonEmpty(srcitem.ID, link), + GUID: firstNonEmpty(guidFromID, srcitem.ID, link), Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)), URL: link, Title: srcitem.Title.Text(), diff --git a/src/parser/atom_test.go b/src/parser/atom_test.go index fa01297..1b81a49 100644 --- a/src/parser/atom_test.go +++ b/src/parser/atom_test.go @@ -131,3 +131,48 @@ func TestAtomImageLinkDuplicated(t *testing.T) { t.Fatal("item.image_url must be unset if present in the content") } } + +func TestAtomLinkInID(t *testing.T) { + feed, _ := Parse(strings.NewReader(` + + + + one updated + https://example.com/posts/1 + 2003-12-13T09:17:51 + + + two + urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 + + + one + https://example.com/posts/1 + + + `)) + have := feed.Items + want := []Item{ + Item{ + GUID: "https://example.com/posts/1::2003-12-13T09:17:51", + Date: time.Date(2003, time.December, 13, 9, 17, 51, 0, time.UTC), + URL: "https://example.com/posts/1", + Title: "one updated", + }, + Item{ + GUID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6", + Date: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC), URL: "", + Title: "two", + }, + Item{ + GUID: "https://example.com/posts/1::", + Date: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC), + URL: "https://example.com/posts/1", + Title: "one", + Content: "", + }, + } + if !reflect.DeepEqual(want, have) { + t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have) + } +} diff --git a/src/parser/rss.go b/src/parser/rss.go index 9cf0773..22090db 100644 --- a/src/parser/rss.go +++ b/src/parser/rss.go @@ -20,7 +20,7 @@ type rssFeed struct { } type rssItem struct { - GUID rssGuid `xml:"guid"` + GUID rssGuid `xml:"guid"` Title string `xml:"title"` Link string `xml:"rss link"` Description string `xml:"rss description"` @@ -86,10 +86,10 @@ func ParseRSS(r io.Reader) (*Feed, error) { } } - permalink := "" - if srcitem.GUID.IsPermaLink == "true" { - permalink = srcitem.GUID.GUID - } + permalink := "" + if srcitem.GUID.IsPermaLink == "true" { + permalink = srcitem.GUID.GUID + } dstfeed.Items = append(dstfeed.Items, Item{ GUID: firstNonEmpty(srcitem.GUID.GUID, srcitem.Link), diff --git a/src/parser/rss_test.go b/src/parser/rss_test.go index 1680805..e1d5e67 100644 --- a/src/parser/rss_test.go +++ b/src/parser/rss_test.go @@ -217,11 +217,11 @@ func TestRSSIsPermalink(t *testing.T) { `)) have := feed.Items want := []Item{ - { - GUID: "http://example.com/posts/1", - URL: "http://example.com/posts/1", - }, - } + { + GUID: "http://example.com/posts/1", + URL: "http://example.com/posts/1", + }, + } for i := 0; i < len(want); i++ { if want[i] != have[i] { t.Errorf("Failed to handle isPermalink\nwant: %#v\nhave: %#v\n", want[i], have[i]) diff --git a/src/server/routes.go b/src/server/routes.go index 8166328..7c9c35b 100644 --- a/src/server/routes.go +++ b/src/server/routes.go @@ -314,12 +314,12 @@ func (s *Server) handleItem(c *router.Context) { return } - // runtime fix for relative links - if !strings.HasPrefix(item.Link, "http") { - if feed := s.db.GetFeed(item.FeedId); feed != nil { - item.Link = htmlutil.AbsoluteUrl(item.Link, feed.Link) - } - } + // runtime fix for relative links + if !htmlutil.IsAPossibleLink(item.Link) { + if feed := s.db.GetFeed(item.FeedId); feed != nil { + item.Link = htmlutil.AbsoluteUrl(item.Link, feed.Link) + } + } item.Content = sanitizer.Sanitize(item.Link, item.Content) diff --git a/src/storage/feed.go b/src/storage/feed.go index b77abc5..1be534f 100644 --- a/src/storage/feed.go +++ b/src/storage/feed.go @@ -29,12 +29,12 @@ func (s *Storage) CreateFeed(title, description, link, feedLink string, folderId folderId, ) - var id int64 - err := row.Scan(&id) - if err != nil { - log.Print(err) - return nil - } + var id int64 + err := row.Scan(&id) + if err != nil { + log.Print(err) + return nil + } return &Feed{ Id: id, Title: title, diff --git a/src/storage/feed_test.go b/src/storage/feed_test.go index 2492757..33aee7c 100644 --- a/src/storage/feed_test.go +++ b/src/storage/feed_test.go @@ -24,13 +24,13 @@ func TestCreateFeedSameLink(t *testing.T) { t.Fatal("expected feed") } - for i := 0; i < 10; i++ { - db.CreateFeed("title", "", "", "http://example2.com/feed.xml", nil) - } + for i := 0; i < 10; i++ { + db.CreateFeed("title", "", "", "http://example2.com/feed.xml", nil) + } feed2 := db.CreateFeed("title", "", "http://example.com", "http://example1.com/feed.xml", nil) if feed1.Id != feed2.Id { - t.Fatalf("expected the same feed.\nwant: %#v\nhave: %#v", feed1, feed2) + t.Fatalf("expected the same feed.\nwant: %#v\nhave: %#v", feed1, feed2) } } diff --git a/src/storage/folder.go b/src/storage/folder.go index eb31ecb..27c33d5 100644 --- a/src/storage/folder.go +++ b/src/storage/folder.go @@ -17,11 +17,11 @@ func (s *Storage) CreateFolder(title string) *Folder { on conflict (title) do update set title = ? returning id`, title, expanded, - // provide title again so that we can extract row id - title, + // provide title again so that we can extract row id + title, ) - var id int64 - err := row.Scan(&id) + var id int64 + err := row.Scan(&id) if err != nil { log.Print(err)