remove feed hacks from storage

This commit is contained in:
Nazar Kanaev 2021-04-05 20:59:11 +01:00
parent 9919d72be0
commit 144fc1606a
5 changed files with 6 additions and 28 deletions

View File

@ -81,10 +81,11 @@ func ParseAtom(r io.Reader) (*Feed, error) {
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
}
for _, srcitem := range srcfeed.Entries {
link := firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First(""))
dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(srcitem.ID),
GUID: firstNonEmpty(srcitem.ID, link),
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")),
URL: link,
Title: srcitem.Title.Text(),
Content: firstNonEmpty(srcitem.Content.String(), srcitem.Summary.String(), srcitem.firstMediaDescription()),
ImageURL: srcitem.firstMediaThumbnail(),

View File

@ -70,6 +70,7 @@ func Parse(r io.Reader) (*Feed, error) {
func (feed *Feed) cleanup() {
feed.Title = strings.TrimSpace(feed.Title)
feed.SiteURL = strings.TrimSpace(feed.SiteURL)
for i, item := range feed.Items {
feed.Items[i].GUID = strings.TrimSpace(item.GUID)
feed.Items[i].URL = strings.TrimSpace(item.URL)

View File

@ -46,7 +46,7 @@ func ParseJSON(data io.Reader) (*Feed, error) {
}
for _, srcitem := range srcfeed.Items {
dstfeed.Items = append(dstfeed.Items, Item{
GUID: srcitem.ID,
GUID: firstNonEmpty(srcitem.ID, srcitem.URL),
Date: dateParse(firstNonEmpty(srcitem.DatePublished, srcitem.DateModified)),
URL: srcitem.URL,
Title: srcitem.Title,

View File

@ -2,9 +2,7 @@ package storage
import (
"database/sql"
"html"
"log"
"net/url"
)
type Feed struct {
@ -19,22 +17,9 @@ type Feed struct {
}
func (s *Storage) CreateFeed(title, description, link, feedLink string, folderId *int64) *Feed {
title = html.UnescapeString(title)
// WILD: fallback to `feed.link` -> `feed.feed_link` -> "<???>" if title is missing
if title == "" {
title = link
// use domain if possible
linkUrl, err := url.Parse(link)
if err == nil && linkUrl.Host != "" && len(linkUrl.Path) <= 1 {
title = linkUrl.Host
}
}
if title == "" {
title = feedLink
}
if title == "" {
title = "<???>"
}
result, err := s.db.Exec(`
insert into feeds (title, description, link, feed_link, folder_id)
values (?, ?, ?, ?, ?)

View File

@ -3,7 +3,6 @@ package storage
import (
"encoding/json"
"fmt"
"html"
"log"
"strings"
"time"
@ -81,14 +80,6 @@ func (s *Storage) CreateItems(items []Item) bool {
now := time.Now()
for _, item := range items {
// WILD: some feeds provide only `item.date_updated` (without `item.date_created`)
if item.Date == nil {
item.Date = item.DateUpdated
}
// WILD: `item.guid` is not always present
if item.GUID == "" {
item.GUID = item.Link
}
_, err = tx.Exec(`
insert into items (
guid, feed_id, title, link, description,
@ -99,7 +90,7 @@ func (s *Storage) CreateItems(items []Item) bool {
values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
on conflict (feed_id, guid) do update set
date_updated = ?, date_arrived = ?`,
item.GUID, item.FeedId, html.UnescapeString(item.Title), item.Link, item.Description,
item.GUID, item.FeedId, item.Title, item.Link, item.Description,
item.Content, item.Author,
item.Date, item.DateUpdated, now,
UNREAD, item.Image, item.PodcastURL,