diff --git a/src/parser/atom.go b/src/parser/atom.go index 43b7d91..b772076 100644 --- a/src/parser/atom.go +++ b/src/parser/atom.go @@ -81,10 +81,11 @@ func ParseAtom(r io.Reader) (*Feed, error) { SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")), } for _, srcitem := range srcfeed.Entries { + link := firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")) dstfeed.Items = append(dstfeed.Items, Item{ - GUID: firstNonEmpty(srcitem.ID), + GUID: firstNonEmpty(srcitem.ID, link), Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)), - URL: firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")), + URL: link, Title: srcitem.Title.Text(), Content: firstNonEmpty(srcitem.Content.String(), srcitem.Summary.String(), srcitem.firstMediaDescription()), ImageURL: srcitem.firstMediaThumbnail(), diff --git a/src/parser/feed.go b/src/parser/feed.go index 925d8c1..28e4d16 100644 --- a/src/parser/feed.go +++ b/src/parser/feed.go @@ -70,6 +70,7 @@ func Parse(r io.Reader) (*Feed, error) { func (feed *Feed) cleanup() { feed.Title = strings.TrimSpace(feed.Title) feed.SiteURL = strings.TrimSpace(feed.SiteURL) + for i, item := range feed.Items { feed.Items[i].GUID = strings.TrimSpace(item.GUID) feed.Items[i].URL = strings.TrimSpace(item.URL) diff --git a/src/parser/json.go b/src/parser/json.go index fd63bbd..3c8a4ad 100644 --- a/src/parser/json.go +++ b/src/parser/json.go @@ -46,7 +46,7 @@ func ParseJSON(data io.Reader) (*Feed, error) { } for _, srcitem := range srcfeed.Items { dstfeed.Items = append(dstfeed.Items, Item{ - GUID: srcitem.ID, + GUID: firstNonEmpty(srcitem.ID, srcitem.URL), Date: dateParse(firstNonEmpty(srcitem.DatePublished, srcitem.DateModified)), URL: srcitem.URL, Title: srcitem.Title, diff --git a/src/storage/feed.go b/src/storage/feed.go index 7c49a8c..ebcfe88 100644 --- a/src/storage/feed.go +++ b/src/storage/feed.go @@ -2,9 +2,7 @@ package storage import ( "database/sql" - "html" "log" - "net/url" ) type Feed struct { @@ -19,22 +17,9 @@ type Feed struct { } func (s *Storage) CreateFeed(title, description, link, feedLink string, folderId *int64) *Feed { - title = html.UnescapeString(title) - // WILD: fallback to `feed.link` -> `feed.feed_link` -> "" if title is missing - if title == "" { - title = link - // use domain if possible - linkUrl, err := url.Parse(link) - if err == nil && linkUrl.Host != "" && len(linkUrl.Path) <= 1 { - title = linkUrl.Host - } - } if title == "" { title = feedLink } - if title == "" { - title = "" - } result, err := s.db.Exec(` insert into feeds (title, description, link, feed_link, folder_id) values (?, ?, ?, ?, ?) diff --git a/src/storage/item.go b/src/storage/item.go index e0e99b3..c7ee80b 100644 --- a/src/storage/item.go +++ b/src/storage/item.go @@ -3,7 +3,6 @@ package storage import ( "encoding/json" "fmt" - "html" "log" "strings" "time" @@ -81,14 +80,6 @@ func (s *Storage) CreateItems(items []Item) bool { now := time.Now() for _, item := range items { - // WILD: some feeds provide only `item.date_updated` (without `item.date_created`) - if item.Date == nil { - item.Date = item.DateUpdated - } - // WILD: `item.guid` is not always present - if item.GUID == "" { - item.GUID = item.Link - } _, err = tx.Exec(` insert into items ( guid, feed_id, title, link, description, @@ -99,7 +90,7 @@ func (s *Storage) CreateItems(items []Item) bool { values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) on conflict (feed_id, guid) do update set date_updated = ?, date_arrived = ?`, - item.GUID, item.FeedId, html.UnescapeString(item.Title), item.Link, item.Description, + item.GUID, item.FeedId, item.Title, item.Link, item.Description, item.Content, item.Author, item.Date, item.DateUpdated, now, UNREAD, item.Image, item.PodcastURL,