49 Commits
v2.1 ... v2.3

Author SHA1 Message Date
Nazar Kanaev
d2c034a850 v2.3 2022-05-03 20:40:39 +01:00
Nazar Kanaev
713930decc update changelog 2022-05-03 15:45:22 +01:00
Nazar Kanaev
ee2a825cf0 get rss link when atom link is present
found in: https://rss.nytimes.com/services/xml/rss/nyt/Arts.xml

when both rss and atom link elements are present, xml parser returns
empty string. provide default namespace to capture rss link properly.
2022-05-03 15:35:57 +01:00
Nazar Kanaev
8e9da86f83 nah 2022-04-09 16:06:19 +01:00
Nazar Kanaev
9eb49fd3a7 credits 2022-04-09 16:03:46 +01:00
Nazar Kanaev
684bc25b83 fix: load more items to prevent scroll lock 2022-04-09 15:58:33 +01:00
nkanaev
8ceab03cd7 fix text color in dark mode 2022-03-01 10:43:25 +00:00
Nazar Kanaev
34dad4ac8f systray: fix build flag 2022-02-16 14:04:04 +00:00
Nazar Kanaev
b40d930f8a credits 2022-02-15 22:12:32 +00:00
Nazar Kanaev
d4b34e900e update test 2022-02-15 22:04:16 +00:00
Nazar Kanaev
954b549029 update 2022-02-15 21:56:32 +00:00
Nazar Kanaev
fbd0b2310e update changelog 2022-02-14 20:33:28 +00:00
Nazar Kanaev
be7af0ccaf handle invalid chars in non-utf8 xml 2022-02-14 15:23:55 +00:00
Nazar Kanaev
18221ef12d use bytes.Buffer instead 2022-02-14 11:05:38 +00:00
Nazar Kanaev
4c0726412b do not build systray in linux 2022-02-14 00:56:03 +00:00
Nazar Kanaev
d7253a60b8 strip out invalid xml characters 2022-02-12 23:42:44 +00:00
Nazar Kanaev
2de3ddff08 fix test 2022-02-12 23:41:01 +00:00
Nazar Kanaev
830248b6ae store feed size 2022-02-10 22:14:47 +00:00
Nazar Kanaev
f8db2ef7ad delete old items based on feed size 2022-02-10 22:14:47 +00:00
Nazar Kanaev
109caaa889 cascade 2022-02-10 22:14:47 +00:00
Nazar Kanaev
d0b83babd2 initial work for smarter database cleanup 2022-02-10 22:14:47 +00:00
nkanaev
de3decbffd remove unused assets 2022-01-26 10:44:33 +00:00
nkanaev
c92229a698 update changelog 2022-01-24 16:56:55 +00:00
nkanaev
176852b662 credits 2022-01-24 16:52:29 +00:00
nkanaev
52cc8ecbbd fix encoding 2022-01-24 16:47:32 +00:00
nkanaev
e3e9542f1e fix page crawling encoding 2022-01-24 14:02:21 +00:00
nkanaev
b78c8bf8bf fix parsing opml with encoding 2022-01-24 13:10:30 +00:00
nkanaev
bff7476b58 refactoring 2022-01-24 12:50:52 +00:00
Nazar Kanaev
05f5785660 update promo.png 2022-01-18 14:36:05 +00:00
David Adi Nugroho
cb50aed89a add placeholder and autofocus to new feed url 2021-12-28 17:56:26 +00:00
Nazar Kanaev
df655aca5e remove todo 2021-11-20 22:42:33 +00:00
Nazar Kanaev
86853a87bf update changelog 2021-11-20 22:07:33 +00:00
Nazar Kanaev
e3109a4384 v2.2 2021-11-20 22:01:05 +00:00
Nazar Kanaev
eee8002d69 do not show loading icon after marking all articles read 2021-11-20 21:34:58 +00:00
Nazar Kanaev
92f11f7513 cleanup gitignore 2021-11-20 21:22:21 +00:00
nkanaev
5428e6be3a update changelog 2021-11-17 10:52:01 +00:00
Nazar Kanaev
1ad693f931 make selected feed/folder always visible 2021-11-11 22:04:27 +00:00
Nazar Kanaev
c2d88a7e3f update promo 2021-11-11 21:45:09 +00:00
nkanaev
3b29d737eb move theme selector to the main settings menu 2021-11-11 13:33:22 +00:00
nkanaev
fe178b8fc6 nope 2021-11-11 13:14:23 +00:00
nkanaev
cca742a1c2 run windows console fix 2021-11-11 09:51:56 +00:00
nkanaev
c7eddff118 make feed/folder settings available in all filter modes 2021-11-10 11:19:14 +00:00
nkanaev
cf30ed249f windows console fix 2021-11-10 11:06:30 +00:00
nkanaev
26b87dee98 remove html tags from titles 2021-11-10 10:54:12 +00:00
Karol Kosek
77c7f938f1 Autoselect current folder when adding a new feed
This patch makes categorising new feeds a bit more intuitive:
the selected folder (or feed within a folder) in the feed list
will automatically be selected when adding a new feed.
2021-11-10 10:20:14 +00:00
Nazar Kanaev
f98de9a0a5 update todo 2021-11-08 11:27:51 +00:00
Nazar Kanaev
6fa2b67024 todo 2021-10-25 16:24:18 +01:00
Nazar Kanaev
355e5feb62 update asset names 2021-08-16 12:57:28 +01:00
Nazar Kanaev
a7dd707062 update changelog 2021-08-16 12:56:59 +01:00
41 changed files with 785 additions and 108 deletions

View File

@@ -131,7 +131,7 @@ jobs:
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./yarr-windows.zip
asset_name: yarr-${{ github.ref }}-windows32.zip
asset_name: yarr-${{ github.ref }}-windows64.zip
asset_content_type: application/zip
- name: Upload Linux
uses: actions/upload-release-asset@v1
@@ -140,5 +140,5 @@ jobs:
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./yarr-linux.zip
asset_name: yarr-${{ github.ref }}-linux32.zip
asset_name: yarr-${{ github.ref }}-linux64.zip
asset_content_type: application/zip

2
.gitignore vendored
View File

@@ -1,5 +1,3 @@
/server/assets.go
/gofeed
/_output
/yarr
*.db

View File

@@ -1,5 +1,21 @@
# upcoming
- (fix) handling encodings (thanks to @f100024 & @fserb)
- (fix) parsing xml feeds with illegal characters (thanks to @stepelu for the report)
- (fix) old articles reappearing as unread (thanks to @adaszko for the report)
- (fix) item list scrolling issue on large screens (thanks to @bielej for the report)
- (fix) keyboard shortcuts color in dark mode (thanks to @John09f9 for the report)
- (etc) autofocus when adding a new feed (thanks to @lakuapik)
# v2.2 (2021-11-20)
- (fix) windows console support (thanks to @dufferzafar for the report)
- (fix) remove html tags from article titles (thanks to Alex Went for the report)
- (etc) autoselect current folder when adding a new feed (thanks to @krkk)
- (etc) folder/feed settings menu available across all filters
# v2.1 (2021-08-16)
- (new) configuration via env variables
- (fix) missing `content-type` headers (thanks to @verahawk for the report)
- (fix) handle opml files not following the spec (thanks to @huangnauh for the report)

View File

@@ -20,3 +20,8 @@ The licenses are included, and the authorship comments are left intact.
https://github.com/getlantern/systray (commit:2c0986d) Apache 2.0
removed golog dependency
- fixconsole
https://github.com/apenwarr/fixconsole (commit:5a9f648) Apache 2.0
removed `w32` dependency

Binary file not shown.

Before

Width:  |  Height:  |  Size: 727 KiB

After

Width:  |  Height:  |  Size: 223 KiB

View File

@@ -1,4 +1,4 @@
VERSION=2.1
VERSION=2.3
GITHASH=$(shell git rev-parse --short=8 HEAD)
CGO_ENABLED=1

View File

@@ -6,6 +6,7 @@ import (
"io"
"io/fs"
"io/ioutil"
"log"
"os"
)
@@ -29,9 +30,18 @@ func Template(path string) *template.Template {
if !found {
tmpl = template.Must(template.New(path).Delims("{%", "%}").Funcs(template.FuncMap{
"inline": func(svg string) template.HTML {
svgfile, _ := FS.Open("graphicarts/" + svg)
content, _ := ioutil.ReadAll(svgfile)
svgfile.Close()
svgfile, err := FS.Open("graphicarts/" + svg)
// should never happen
if err != nil {
log.Fatal(err)
}
defer svgfile.Close()
content, err := ioutil.ReadAll(svgfile)
// should never happen
if err != nil {
log.Fatal(err)
}
return template.HTML(content)
},
}).ParseFS(FS, path))

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-chevron-down"><polyline points="6 9 12 15 18 9"></polyline></svg>

Before

Width:  |  Height:  |  Size: 269 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-list"><line x1="8" y1="6" x2="21" y2="6"></line><line x1="8" y1="12" x2="21" y2="12"></line><line x1="8" y1="18" x2="21" y2="18"></line><line x1="3" y1="6" x2="3.01" y2="6"></line><line x1="3" y1="12" x2="3.01" y2="12"></line><line x1="3" y1="18" x2="3.01" y2="18"></line></svg>

Before

Width:  |  Height:  |  Size: 482 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-menu"><line x1="3" y1="12" x2="21" y2="12"></line><line x1="3" y1="6" x2="21" y2="6"></line><line x1="3" y1="18" x2="21" y2="18"></line></svg>

Before

Width:  |  Height:  |  Size: 346 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-more-vertical"><circle cx="12" cy="12" r="1"></circle><circle cx="12" cy="5" r="1"></circle><circle cx="12" cy="19" r="1"></circle></svg>

Before

Width:  |  Height:  |  Size: 341 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-settings"><circle cx="12" cy="12" r="3"></circle><path d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"></path></svg>

Before

Width:  |  Height:  |  Size: 1011 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-trash-2"><polyline points="3 6 5 6 21 6"></polyline><path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"></path><line x1="10" y1="11" x2="10" y2="17"></line><line x1="14" y1="11" x2="14" y2="17"></line></svg>

Before

Width:  |  Height:  |  Size: 448 B

View File

@@ -57,6 +57,18 @@
<div class="dropdown-divider"></div>
<header class="dropdown-header">Theme</header>
<div class="row text-center m-0">
<button class="btn btn-link col-4 px-0 rounded-0"
:class="'theme-'+t"
@click.stop="theme.name = t"
v-for="t in ['light', 'sepia', 'night']">
<span class="icon" v-if="theme.name == t">{% inline "check.svg" %}</span>
</button>
</div>
<div class="dropdown-divider"></div>
<header class="dropdown-header">Auto Refresh</header>
<div class="row text-center m-0">
<button class="dropdown-item col-4 px-0" :class="{active: !refreshRate}" @click.stop="refreshRate = 0">0</button>
@@ -117,6 +129,7 @@
<div v-for="folder in foldersWithFeeds">
<label class="selectgroup mt-1"
:class="{'d-none': filterSelected
&& !(current.folder.id == folder.id || current.feed.folder_id == folder.id)
&& !filteredFolderStats[folder.id]
&& (!itemSelectedDetails || feedsById[itemSelectedDetails.feed_id].folder_id != folder.id)}">
<input type="radio" name="feed" :value="'folder:'+folder.id" v-model="feedSelected">
@@ -133,6 +146,7 @@
<div v-show="!folder.id || folder.is_expanded" class="mt-1" :class="{'pl-3': folder.id}">
<label class="selectgroup"
:class="{'d-none': filterSelected
&& !(current.feed.id == feed.id)
&& !filteredFeedStats[feed.id]
&& (!itemSelectedDetails || itemSelectedDetails.feed_id != feed.id)}"
v-for="feed in folder.feeds">
@@ -177,11 +191,16 @@
title="Mark All Read">
<span class="icon">{% inline "check.svg" %}</span>
</button>
<button class="btn btn-link toolbar-item px-2 ml-2" v-if="!current.type" disabled>
<span class="icon">{% inline "more-horizontal.svg" %}</span>
</button>
<dropdown class="settings-dropdown"
toggle-class="btn btn-link toolbar-item px-2 ml-2"
drop="right"
title="Feed Settings"
v-if="!filterSelected && current.type == 'feed'">
v-if="current.type == 'feed'">
<template v-slot:button>
<span class="icon">{% inline "more-horizontal.svg" %}</span>
</template>
@@ -226,7 +245,7 @@
toggle-class="btn btn-link toolbar-item px-2 ml-2"
title="Folder Settings"
drop="right"
v-if="!filterSelected && current.type == 'folder'">
v-if="current.type == 'folder'">
<template v-slot:button>
<span class="icon">{% inline "more-horizontal.svg" %}</span>
</template>
@@ -285,14 +304,6 @@
<template v-slot:button>
<span class="icon">{% inline "sliders.svg" %}</span>
</template>
<div class="row text-center m-0">
<button class="btn btn-link col-4 px-0 rounded-0"
:class="'theme-'+t"
@click.stop="theme.name = t"
v-for="t in ['light', 'sepia', 'night']">
<span class="icon" v-if="theme.name == t">{% inline "check.svg" %}</span>
</button>
</div>
<button class="dropdown-item" :class="{active: !theme.font}" @click.stop="theme.font = ''">sans-serif</button>
<button class="dropdown-item font-serif" :class="{active: theme.font == 'serif'}" @click.stop="theme.font = 'serif'">serif</button>
@@ -343,14 +354,14 @@
<p class="cursor-default"><b>New Feed</b></p>
<form action="" @submit.prevent="createFeed(event)" class="mt-4">
<label for="feed-url">URL</label>
<input id="feed-url" name="url" type="url" class="form-control" required autocomplete="off" :readonly="feedNewChoice.length > 0">
<input id="feed-url" name="url" type="url" class="form-control" required autocomplete="off" :readonly="feedNewChoice.length > 0" placeholder="https://example.com/feed" v-focus>
<label for="feed-folder" class="mt-3 d-block">
Folder
<a href="#" class="float-right text-decoration-none" @click.prevent="createNewFeedFolder()">new folder</a>
</label>
<select class="form-control" id="feed-folder" name="folder_id" ref="newFeedFolder">
<option value="">---</option>
<option :value="folder.id" v-for="folder in folders">{{ folder.title }}</option>
<option :value="folder.id" v-for="folder in folders" :selected="folder.id === current.feed.folder_id || folder.id === current.folder.id">{{ folder.title }}</option>
</select>
<div class="mt-4" v-if="feedNewChoice.length">
<p class="mb-2">

View File

@@ -21,6 +21,12 @@ Vue.directive('scroll', {
},
})
Vue.directive('focus', {
inserted: function(el) {
el.focus()
}
})
Vue.component('drag', {
props: ['width'],
template: '<div class="drag"></div>',
@@ -413,7 +419,7 @@ var vm = new Vue({
}
this.loading.items = true
return api.items.list(query).then(function(data) {
api.items.list(query).then(function(data) {
if (loadMore) {
vm.items = vm.items.concat(data.list)
} else {
@@ -421,14 +427,24 @@ var vm = new Vue({
}
vm.itemsHasMore = data.has_more
vm.loading.items = false
// load more if there's some space left at the bottom of the item list.
vm.$nextTick(function() {
if (vm.itemsHasMore && !vm.loading.items && vm.itemListCloseToBottom()) {
vm.refreshItems(true)
}
})
})
},
itemListCloseToBottom: function() {
var el = this.$refs.itemlist
var closeToBottom = (el.scrollHeight - el.scrollTop - el.offsetHeight) < 50
return closeToBottom
},
loadMoreItems: function(event, el) {
if (!this.itemsHasMore) return
if (this.loading.items) return
var closeToBottom = (el.scrollHeight - el.scrollTop - el.offsetHeight) < 50
if (closeToBottom) this.refreshItems(true)
if (this.itemListCloseToBottom()) this.refreshItems(true)
},
markItemsRead: function() {
var query = this.getItemsQuery()
@@ -436,6 +452,7 @@ var vm = new Vue({
vm.items = []
vm.itemsPage = {'cur': 1, 'num': 1}
vm.itemSelected = null
vm.itemsHasMore = false
vm.refreshStats()
})
},

View File

@@ -85,6 +85,10 @@ select.form-control:not([multiple]):not([size]) {
outline: none;
}
.table-compact {
color: unset !important;
}
.table-compact tr td:first-child {
padding-left: 0;
}

View File

@@ -29,6 +29,8 @@ func opt(envVar, defaultValue string) string {
}
func main() {
platform.FixConsoleIfNeeded()
var addr, db, authfile, certfile, keyfile, basepath, logfile string
var ver, open bool

View File

@@ -9,18 +9,25 @@ import (
"net/url"
"strings"
"time"
"github.com/nkanaev/yarr/src/content/htmlutil"
"golang.org/x/net/html/charset"
)
var UnknownFormat = errors.New("unknown feed format")
type processor func(r io.Reader) (*Feed, error)
type feedProbe struct {
feedType string
callback func(r io.Reader) (*Feed, error)
encoding string
}
func sniff(lookup string) (string, processor) {
func sniff(lookup string) (out feedProbe) {
lookup = strings.TrimSpace(lookup)
lookup = strings.TrimLeft(lookup, "\x00\xEF\xBB\xBF\xFE\xFF")
if len(lookup) < 0 {
return "", nil
if len(lookup) == 0 {
return
}
switch lookup[0] {
@@ -31,24 +38,42 @@ func sniff(lookup string) (string, processor) {
if token == nil {
break
}
// check <?xml encoding="ENCODING" ?>
if el, ok := token.(xml.ProcInst); ok && el.Target == "xml" {
out.encoding = strings.ToLower(procInst("encoding", string(el.Inst)))
}
if el, ok := token.(xml.StartElement); ok {
switch el.Name.Local {
case "rss":
return "rss", ParseRSS
out.feedType = "rss"
out.callback = ParseRSS
return
case "RDF":
return "rdf", ParseRDF
out.feedType = "rdf"
out.callback = ParseRDF
return
case "feed":
return "atom", ParseAtom
out.feedType = "atom"
out.callback = ParseAtom
return
}
}
}
case '{':
return "json", ParseJSON
out.feedType = "json"
out.callback = ParseJSON
return
}
return "", nil
return
}
func Parse(r io.Reader) (*Feed, error) {
return ParseWithEncoding(r, "")
}
func ParseWithEncoding(r io.Reader, fallbackEncoding string) (*Feed, error) {
lookup := make([]byte, 2048)
n, err := io.ReadFull(r, lookup)
switch {
@@ -61,18 +86,42 @@ func Parse(r io.Reader) (*Feed, error) {
r = io.MultiReader(bytes.NewReader(lookup), r)
}
_, callback := sniff(string(lookup))
if callback == nil {
out := sniff(string(lookup))
if out.feedType == "" {
return nil, UnknownFormat
}
feed, err := callback(r)
if out.encoding == "" && fallbackEncoding != "" {
r, err = charset.NewReaderLabel(fallbackEncoding, r)
if err != nil {
return nil, err
}
}
if (out.feedType != "json") && (out.encoding == "" || out.encoding == "utf-8") {
// XML decoder will not rely on custom CharsetReader (see `xmlDecoder`)
// to handle invalid xml characters.
// Assume input is already UTF-8 and do the cleanup here.
r = NewSafeXMLReader(r)
}
feed, err := out.callback(r)
if feed != nil {
feed.cleanup()
}
return feed, err
}
func ParseAndFix(r io.Reader, baseURL, fallbackEncoding string) (*Feed, error) {
feed, err := ParseWithEncoding(r, fallbackEncoding)
if err != nil {
return nil, err
}
feed.TranslateURLs(baseURL)
feed.SetMissingDatesTo(time.Now())
return feed, nil
}
func (feed *Feed) cleanup() {
feed.Title = strings.TrimSpace(feed.Title)
feed.SiteURL = strings.TrimSpace(feed.SiteURL)
@@ -80,7 +129,7 @@ func (feed *Feed) cleanup() {
for i, item := range feed.Items {
feed.Items[i].GUID = strings.TrimSpace(item.GUID)
feed.Items[i].URL = strings.TrimSpace(item.URL)
feed.Items[i].Title = strings.TrimSpace(item.Title)
feed.Items[i].Title = strings.TrimSpace(htmlutil.ExtractText(item.Title))
feed.Items[i].Content = strings.TrimSpace(item.Content)
if item.ImageURL != "" && strings.Contains(item.Content, item.ImageURL) {

View File

@@ -7,38 +7,40 @@ import (
)
func TestSniff(t *testing.T) {
testcases := [][2]string{
testcases := []struct{
input string
want feedProbe
}{
{
`<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`,
"rdf",
feedProbe{feedType: "rdf", callback: ParseRDF},
},
{
`<?xml version="1.0" encoding="ISO-8859-1"?><rss version="2.0"><channel></channel></rss>`,
"rss",
feedProbe{feedType: "rss", callback: ParseRSS, encoding: "iso-8859-1"},
},
{
`<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`,
"rss",
feedProbe{feedType: "rss", callback: ParseRSS},
},
{
`<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`,
"atom",
feedProbe{feedType: "atom", callback: ParseAtom, encoding: "utf-8"},
},
{
`{}`,
"json",
feedProbe{feedType: "json", callback: ParseJSON},
},
{
`<!DOCTYPE html><html><head><title></title></head><body></body></html>`,
"",
feedProbe{},
},
}
for _, testcase := range testcases {
have, _ := sniff(testcase[0])
want := testcase[1]
if want != have {
t.Log(testcase[0])
t.Errorf("Invalid format: want=%#v have=%#v", want, have)
want := testcase.want
have := sniff(testcase.input)
if want.encoding != have.encoding || want.feedType != have.feedType {
t.Errorf("Invalid output\n---\n%s\n---\n\nwant=%#v\nhave=%#v", testcase.input, want, have)
}
}
}
@@ -107,3 +109,44 @@ func TestParseFeedWithBOM(t *testing.T) {
t.FailNow()
}
}
func TestParseCleanIllegalCharsInUTF8(t *testing.T) {
data := `
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>` + "\a" + `title</title>
</item>
</channel>
</rss>
`
feed, err := Parse(strings.NewReader(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Items) != 1 || feed.Items[0].Title != "title" {
t.Fatalf("invalid feed, got: %v", feed)
}
}
func TestParseCleanIllegalCharsInNonUTF8(t *testing.T) {
// echo привет | iconv -f utf8 -t cp1251 | hexdump -C
data := `
<?xml version="1.0" encoding="windows-1251"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>` + "\a \xef\xf0\xe8\xe2\xe5\xf2\x0a \a" + `</title>
</item>
</channel>
</rss>
`
feed, err := Parse(strings.NewReader(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Items) != 1 || feed.Items[0].Title != "привет" {
t.Fatalf("invalid feed, got: %v", feed)
}
}

View File

@@ -22,7 +22,7 @@ type rssFeed struct {
type rssItem struct {
GUID string `xml:"guid"`
Title string `xml:"title"`
Link string `xml:"link"`
Link string `xml:"rss link"`
Description string `xml:"rss description"`
PubDate string `xml:"pubDate"`
Enclosures []rssEnclosure `xml:"enclosure"`

View File

@@ -180,3 +180,26 @@ func TestRSSPodcastDuplicated(t *testing.T) {
t.Fatal("item.audio_url must be unset if present in the content")
}
}
func TestRSSTitleHTMLTags(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>&lt;p&gt;title in p&lt;/p&gt;</title>
</item>
<item>
<title>very &lt;strong&gt;strong&lt;/strong&gt; title</title>
</item>
</channel>
</rss>
`))
have := []string{feed.Items[0].Title, feed.Items[1].Title}
want := []string{"title in p", "very strong title"}
for i := 0; i < len(want); i++ {
if want[i] != have[i] {
t.Errorf("title doesn't match\nwant: %#v\nhave: %#v\n", want[i], have[i])
}
}
}

View File

@@ -1,6 +1,8 @@
package parser
import (
"bufio"
"bytes"
"encoding/xml"
"io"
"regexp"
@@ -30,6 +32,81 @@ func plain2html(text string) string {
func xmlDecoder(r io.Reader) *xml.Decoder {
decoder := xml.NewDecoder(r)
decoder.Strict = false
decoder.CharsetReader = charset.NewReaderLabel
decoder.CharsetReader = func(cs string, input io.Reader) (io.Reader, error) {
r, err := charset.NewReaderLabel(cs, input)
if err == nil {
r = NewSafeXMLReader(r)
}
return r, err
}
return decoder
}
type safexmlreader struct {
reader *bufio.Reader
buffer *bytes.Buffer
}
func NewSafeXMLReader(r io.Reader) io.Reader {
return &safexmlreader{
reader: bufio.NewReader(r),
buffer: bytes.NewBuffer(make([]byte, 0, 4096)),
}
}
func (xr *safexmlreader) Read(p []byte) (int, error) {
for xr.buffer.Len() < cap(p) {
r, _, err := xr.reader.ReadRune()
if err == io.EOF {
if xr.buffer.Len() == 0 {
return 0, io.EOF
}
break
}
if err != nil {
return 0, err
}
if isInCharacterRange(r) {
xr.buffer.WriteRune(r)
}
}
return xr.buffer.Read(p)
}
// NOTE: copied from "encoding/xml" package
// Decide whether the given rune is in the XML Character Range, per
// the Char production of https://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(r rune) (inrange bool) {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}
// NOTE: copied from "encoding/xml" package
// procInst parses the `param="..."` or `param='...'`
// value out of the provided string, returning "" if not found.
func procInst(param, s string) string {
// TODO: this parsing is somewhat lame and not exact.
// It works for all actual cases, though.
param = param + "="
idx := strings.Index(s, param)
if idx == -1 {
return ""
}
v := s[idx+len(param):]
if v == "" {
return ""
}
if v[0] != '\'' && v[0] != '"' {
return ""
}
idx = strings.IndexRune(v[1:], rune(v[0]))
if idx == -1 {
return ""
}
return v[1 : idx+1]
}

88
src/parser/util_test.go Normal file
View File

@@ -0,0 +1,88 @@
package parser
import (
"bytes"
"io"
"reflect"
"testing"
)
func TestSafeXMLReader(t *testing.T) {
var f io.Reader
want := []byte("привет мир")
f = bytes.NewReader(want)
f = NewSafeXMLReader(f)
have, err := io.ReadAll(f)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(want, have) {
t.Fatalf("invalid output\nwant: %v\nhave: %v", want, have)
}
}
func TestSafeXMLReaderRemoveUnwantedRunes(t *testing.T) {
var f io.Reader
input := []byte("\aпривет \x0cмир\ufffe\uffff")
want := []byte("привет мир")
f = bytes.NewReader(input)
f = NewSafeXMLReader(f)
have, err := io.ReadAll(f)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(want, have) {
t.Fatalf("invalid output\nwant: %v\nhave: %v", want, have)
}
}
func TestSafeXMLReaderPartial1(t *testing.T) {
var f io.Reader
input := []byte("\aпривет \x0cмир\ufffe\uffff")
want := []byte("привет мир")
f = bytes.NewReader(input)
f = NewSafeXMLReader(f)
buf := make([]byte, 1)
for i := 0; i < len(want); i++ {
n, err := f.Read(buf)
if err != nil {
t.Fatal(err)
}
if n != 1 {
t.Fatalf("expected 1 byte, got %d", n)
}
if buf[0] != want[i] {
t.Fatalf("invalid char at pos %d\nwant: %v\nhave: %v", i, want[i], buf[0])
}
}
if x, err := f.Read(buf); err != io.EOF {
t.Fatalf("expected EOF, %v, %v %v", buf, x, err)
}
}
func TestSafeXMLReaderPartial2(t *testing.T) {
var f io.Reader
input := []byte("привет\a\a\a\a\a")
f = bytes.NewReader(input)
f = NewSafeXMLReader(f)
buf := make([]byte, 12)
n, err := f.Read(buf)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if n != 12 {
t.Fatalf("expected 12 bytes")
}
n, err = f.Read(buf)
if n != 0 {
t.Fatalf("expected 0")
}
if err != io.EOF {
t.Fatalf("expected EOF, got %v", err)
}
}

View File

@@ -0,0 +1,14 @@
// +build !windows
package platform
// On non-windows platforms, we don't need to do anything. The console
// starts off attached already, if it exists.
func AttachConsole() error {
return nil
}
func FixConsoleIfNeeded() error {
return nil
}

View File

@@ -0,0 +1,131 @@
package platform
import (
"fmt"
"golang.org/x/sys/windows"
"os"
"syscall"
)
func AttachConsole() error {
const ATTACH_PARENT_PROCESS = ^uintptr(0)
proc := syscall.MustLoadDLL("kernel32.dll").MustFindProc("AttachConsole")
r1, _, err := proc.Call(ATTACH_PARENT_PROCESS)
if r1 == 0 {
errno, ok := err.(syscall.Errno)
if ok && errno == windows.ERROR_INVALID_HANDLE {
// console handle doesn't exist; not a real
// error, but the console handle will be
// invalid.
return nil
}
return err
} else {
return nil
}
}
var oldStdin, oldStdout, oldStderr *os.File
// Windows console output is a mess.
//
// If you compile as "-H windows", then if you launch your program without
// a console, Windows forcibly creates one to use as your stdin/stdout, which
// is silly for a GUI app, so we can't do that.
//
// If you compile as "-H windowsgui", then it doesn't create a console for
// your app... but also doesn't provide a working stdin/stdout/stderr even if
// you *did* launch from the console. However, you can use AttachConsole()
// to get a handle to your parent process's console, if any, and then
// os.NewFile() to turn that handle into a fd usable as stdout/stderr.
//
// However, then you have the problem that if you redirect stdout or stderr
// from the shell, you end up ignoring the redirection by forcing it to the
// console.
//
// To fix *that*, we have to detect whether there was a pre-existing stdout
// or not. We can check GetStdHandle(), which returns 0 for "should be
// console" and nonzero for "already pointing at a file."
//
// Be careful though! As soon as you run AttachConsole(), it resets *all*
// the GetStdHandle() handles to point them at the console instead, thus
// throwing away the original file redirects. So we have to GetStdHandle()
// *before* AttachConsole().
//
// For some reason, powershell redirections provide a valid file handle, but
// writing to that handle doesn't write to the file. I haven't found a way
// to work around that. (Windows 10.0.17763.379)
//
// Net result is as follows.
// Before:
// SHELL NON-REDIRECTED REDIRECTED
// explorer.exe no console n/a
// cmd.exe broken works
// powershell broken broken
// WSL bash broken works
// After
// SHELL NON-REDIRECTED REDIRECTED
// explorer.exe no console n/a
// cmd.exe works works
// powershell works broken
// WSL bash works works
//
// We don't seem to make anything worse, at least.
func FixConsoleIfNeeded() error {
// Retain the original console objects, to prevent Go from automatically
// closing their file descriptors when they get garbage collected.
// You never want to close file descriptors 0, 1, and 2.
oldStdin, oldStdout, oldStderr = os.Stdin, os.Stdout, os.Stderr
stdin, _ := syscall.GetStdHandle(syscall.STD_INPUT_HANDLE)
stdout, _ := syscall.GetStdHandle(syscall.STD_OUTPUT_HANDLE)
stderr, _ := syscall.GetStdHandle(syscall.STD_ERROR_HANDLE)
var invalid syscall.Handle
con := invalid
if stdin == invalid || stdout == invalid || stderr == invalid {
err := AttachConsole()
if err != nil {
return fmt.Errorf("attachconsole: %v", err)
}
if stdin == invalid {
stdin, _ = syscall.GetStdHandle(syscall.STD_INPUT_HANDLE)
}
if stdout == invalid {
stdout, _ = syscall.GetStdHandle(syscall.STD_OUTPUT_HANDLE)
con = stdout
}
if stderr == invalid {
stderr, _ = syscall.GetStdHandle(syscall.STD_ERROR_HANDLE)
con = stderr
}
}
if con != invalid {
// Make sure the console is configured to convert
// \n to \r\n, like Go programs expect.
h := windows.Handle(con)
var st uint32
err := windows.GetConsoleMode(h, &st)
if err != nil {
return fmt.Errorf("GetConsoleMode: %v", err)
}
err = windows.SetConsoleMode(h, st&^windows.DISABLE_NEWLINE_AUTO_RETURN)
if err != nil {
return fmt.Errorf("SetConsoleMode: %v", err)
}
}
if stdin != invalid {
os.Stdin = os.NewFile(uintptr(stdin), "stdin")
}
if stdout != invalid {
os.Stdout = os.NewFile(uintptr(stdout), "stdout")
}
if stderr != invalid {
os.Stderr = os.NewFile(uintptr(stderr), "stderr")
}
return nil
}

View File

@@ -3,6 +3,8 @@ package opml
import (
"encoding/xml"
"io"
"golang.org/x/net/html/charset"
)
type opml struct {
@@ -45,6 +47,7 @@ func Parse(r io.Reader) (Folder, error) {
decoder := xml.NewDecoder(r)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = charset.NewReaderLabel
err := decoder.Decode(&val)
if err != nil {

View File

@@ -1,6 +1,7 @@
package opml
import (
"os"
"reflect"
"strings"
"testing"
@@ -87,3 +88,41 @@ func TestParseFallback(t *testing.T) {
t.Fatal("invalid opml")
}
}
func TestParseWithEncoding(t *testing.T) {
file, err := os.Open("sample_win1251.xml")
if err != nil {
t.Fatal(err)
}
have, err := Parse(file)
if err != nil {
t.Fatal(err)
}
want := Folder{
Title: "",
Feeds: []Feed{
{
Title: "пример1",
FeedUrl: "https://baz.com/feed.xml",
SiteUrl: "https://baz.com/",
},
},
Folders: []Folder{
{
Title: "папка",
Feeds: []Feed{
{
Title: "пример2",
FeedUrl: "https://foo.com/feed.xml",
SiteUrl: "https://foo.com/",
},
},
},
},
}
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.Fatal("invalid opml")
}
}

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="windows-1251"?>
<opml version="1.1">
<head><title><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD></title></head>
<body>
<outline text="<22><><EFBFBD><EFBFBD><EFBFBD>">
<outline type="rss" text="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2" description="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2" xmlUrl="https://foo.com/feed.xml" htmlUrl="https://foo.com/"/>
</outline>
<outline type="rss" text="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1" description="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1" xmlUrl="https://baz.com/feed.xml" htmlUrl="https://baz.com/"/>
</body>
</opml>

View File

@@ -219,7 +219,11 @@ func (s *Server) handleFeedList(c *router.Context) {
result.FeedLink,
form.FolderID,
)
s.db.CreateItems(worker.ConvertItems(result.Feed.Items, *feed))
items := worker.ConvertItems(result.Feed.Items, *feed)
if len(items) > 0 {
s.db.CreateItems(items)
s.db.SetFeedSize(feed.Id, len(items))
}
s.worker.FindFeedFavicon(*feed)
c.JSON(http.StatusOK, map[string]interface{}{
@@ -457,14 +461,13 @@ func (s *Server) handlePageCrawl(c *router.Context) {
return
}
res, err := http.Get(url)
body, err := worker.GetBody(url)
if err != nil {
log.Print(err)
c.Out.WriteHeader(http.StatusBadRequest)
return
}
defer res.Body.Close()
content, err := readability.ExtractContent(res.Body)
content, err := readability.ExtractContent(strings.NewReader(body))
if err != nil {
log.Print(err)
c.Out.WriteHeader(http.StatusNoContent)

View File

@@ -194,3 +194,15 @@ func (s *Storage) GetFeedErrors() map[int64]string {
}
return errors
}
func (s *Storage) SetFeedSize(feedId int64, size int) {
_, err := s.db.Exec(`
insert into feed_sizes (feed_id, size)
values (?, ?)
on conflict (feed_id) do update set size = excluded.size`,
feedId, size,
)
if err != nil {
log.Print(err)
}
}

View File

@@ -61,7 +61,7 @@ type ItemFilter struct {
FeedID *int64
Status *ItemStatus
Search *string
After *int64
After *int64
}
type MarkFilter struct {
@@ -292,45 +292,70 @@ func (s *Storage) SyncSearch() {
}
}
var (
itemsKeepSize = 50
itemsKeepDays = 90
)
// Delete old articles from the database to cleanup space.
//
// The rules:
// * Never delete starred entries.
// * Keep at least the same amount of articles the feed provides (default: 50).
// This prevents from deleting items for rarely updated and/or ever-growing
// feeds which might eventually reappear as unread.
// * Keep entries for a certain period (default: 90 days).
func (s *Storage) DeleteOldItems() {
rows, err := s.db.Query(fmt.Sprintf(`
select feed_id, count(*) as num_items
from items
where status != %d
group by feed_id
having num_items > 50
`, STARRED))
rows, err := s.db.Query(`
select
i.feed_id,
max(coalesce(s.size, 0), ?) as max_items,
count(*) as num_items
from items i
left outer join feed_sizes s on s.feed_id = i.feed_id
where status != ?
group by i.feed_id
`, itemsKeepSize, STARRED)
if err != nil {
log.Print(err)
return
}
feedIds := make([]int64, 0)
feedLimits := make(map[int64]int64, 0)
for rows.Next() {
var id int64
rows.Scan(&id, nil)
feedIds = append(feedIds, id)
var feedId, limit int64
rows.Scan(&feedId, &limit, nil)
feedLimits[feedId] = limit
}
for _, feedId := range feedIds {
for feedId, limit := range feedLimits {
result, err := s.db.Exec(`
delete from items where feed_id = ? and status != ? and date_arrived < ?`,
delete from items
where id in (
select i.id
from items i
where i.feed_id = ? and status != ?
order by date desc
limit -1 offset ?
) and date_arrived < ?
`,
feedId,
STARRED,
time.Now().Add(-time.Hour*24*90), // 90 days
limit,
time.Now().Add(-time.Hour*time.Duration(24*itemsKeepDays)),
)
if err != nil {
log.Print(err)
return
}
num, err := result.RowsAffected()
numDeleted, err := result.RowsAffected()
if err != nil {
log.Print(err)
return
}
if num > 0 {
log.Printf("Deleted %d old items (%d)", num, feedId)
if numDeleted > 0 {
log.Printf("Deleted %d old items (feed: %d)", numDeleted, feedId)
}
}
}

View File

@@ -3,6 +3,7 @@ package storage
import (
"log"
"reflect"
"strconv"
"testing"
"time"
)
@@ -45,14 +46,14 @@ func testItemsSetup(db *Storage) testItemScope {
db.CreateItems([]Item{
// feed11
{GUID: "item111", FeedId: feed11.Id, Title: "title111", Date: now.Add(time.Hour * 24 * 1)},
{GUID: "item112", FeedId: feed11.Id, Title: "title112", Date: now.Add(time.Hour * 24 * 2)}, // read
{GUID: "item113", FeedId: feed11.Id, Title: "title113", Date: now.Add(time.Hour * 24 * 3)}, // starred
{GUID: "item112", FeedId: feed11.Id, Title: "title112", Date: now.Add(time.Hour * 24 * 2)}, // read
{GUID: "item113", FeedId: feed11.Id, Title: "title113", Date: now.Add(time.Hour * 24 * 3)}, // starred
// feed12
{GUID: "item121", FeedId: feed12.Id, Title: "title121", Date: now.Add(time.Hour * 24 * 4)},
{GUID: "item122", FeedId: feed12.Id, Title: "title122", Date: now.Add(time.Hour * 24 * 5)}, // read
{GUID: "item122", FeedId: feed12.Id, Title: "title122", Date: now.Add(time.Hour * 24 * 5)}, // read
// feed21
{GUID: "item211", FeedId: feed21.Id, Title: "title211", Date: now.Add(time.Hour * 24 * 6)}, // read
{GUID: "item212", FeedId: feed21.Id, Title: "title212", Date: now.Add(time.Hour * 24 * 7)}, // starred
{GUID: "item211", FeedId: feed21.Id, Title: "title211", Date: now.Add(time.Hour * 24 * 6)}, // read
{GUID: "item212", FeedId: feed21.Id, Title: "title212", Date: now.Add(time.Hour * 24 * 7)}, // starred
// feed01
{GUID: "item011", FeedId: feed01.Id, Title: "title011", Date: now.Add(time.Hour * 24 * 8)},
{GUID: "item012", FeedId: feed01.Id, Title: "title012", Date: now.Add(time.Hour * 24 * 9)}, // read
@@ -271,3 +272,59 @@ func TestMarkItemsRead(t *testing.T) {
t.Fail()
}
}
func TestDeleteOldItems(t *testing.T) {
extraItems := 10
now := time.Now()
db := testDB()
feed := db.CreateFeed("feed", "", "", "http://test.com/feed11.xml", nil)
items := make([]Item, 0)
for i := 0; i < itemsKeepSize+extraItems; i++ {
istr := strconv.Itoa(i)
items = append(items, Item{
GUID: istr,
FeedId: feed.Id,
Title: istr,
Date: now.Add(time.Hour * time.Duration(i)),
})
}
db.CreateItems(items)
db.SetFeedSize(feed.Id, itemsKeepSize)
var feedSize int
err := db.db.QueryRow(
`select size from feed_sizes where feed_id = ?`, feed.Id,
).Scan(&feedSize)
if err != nil {
t.Fatal(err)
}
if feedSize != itemsKeepSize {
t.Fatalf(
"expected feed size to get updated\nwant: %d\nhave: %d",
itemsKeepSize+extraItems,
feedSize,
)
}
// expire only the first 3 articles
_, err = db.db.Exec(
`update items set date_arrived = ?
where id in (select id from items limit 3)`,
now.Add(-time.Hour*time.Duration(itemsKeepDays*24)),
)
if err != nil {
t.Fatal(err)
}
db.DeleteOldItems()
feedItems := db.ListItems(ItemFilter{FeedID: &feed.Id}, 1000, false)
if len(feedItems) != len(items)-3 {
t.Fatalf(
"invalid number of old items kept\nwant: %d\nhave: %d",
len(items)-3,
len(feedItems),
)
}
}

View File

@@ -13,6 +13,7 @@ var migrations = []func(*sql.Tx) error{
m04_item_podcasturl,
m05_move_description_to_content,
m06_fill_missing_dates,
m07_add_feed_size,
}
var maxVersion = int64(len(migrations))
@@ -259,3 +260,14 @@ func m06_fill_missing_dates(tx *sql.Tx) error {
_, err := tx.Exec(sql)
return err
}
func m07_add_feed_size(tx *sql.Tx) error {
sql := `
create table if not exists feed_sizes (
feed_id references feeds(id) on delete cascade unique,
size integer not null default 0
);
`
_, err := tx.Exec(sql)
return err
}

View File

@@ -11,6 +11,7 @@ func testDB() *Storage {
log.SetOutput(io.Discard)
db, _ := New(":memory:")
log.SetOutput(os.Stderr)
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
return db
}

View File

@@ -9,3 +9,4 @@ hash:
changes:
-removed `getlantern/golog` dependency
-prevent from compiling in linux

View File

@@ -1,3 +1,5 @@
// +build darwin windows
/*
Package systray is a cross-platform Go library to place an icon and menu in the notification area.
*/

View File

@@ -1,3 +1,5 @@
// +build never
package systray
/*

View File

@@ -1,9 +1,8 @@
// +build !windows
// +build darwin
package systray
/*
#cgo linux pkg-config: gtk+-3.0 appindicator3-0.1
#cgo darwin CFLAGS: -DDARWIN -x objective-c -fobjc-arc
#cgo darwin LDFLAGS: -framework Cocoa

View File

@@ -6,10 +6,10 @@ import (
"fmt"
"io"
"io/ioutil"
"mime"
"net/http"
"net/url"
"strings"
"time"
"github.com/nkanaev/yarr/src/content/scraper"
"github.com/nkanaev/yarr/src/parser"
@@ -39,29 +39,32 @@ func DiscoverFeed(candidateUrl string) (*DiscoverResult, error) {
if res.StatusCode != 200 {
return nil, fmt.Errorf("status code %d", res.StatusCode)
}
cs := getCharset(res)
body, err := httpBody(res)
if err != nil {
return nil, err
}
content, err := ioutil.ReadAll(body)
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
// Try to feed into parser
feed, err := parser.Parse(bytes.NewReader(content))
feed, err := parser.ParseAndFix(bytes.NewReader(body), candidateUrl, cs)
if err == nil {
feed.TranslateURLs(candidateUrl)
feed.SetMissingDatesTo(time.Now())
result.Feed = feed
result.FeedLink = candidateUrl
return result, nil
}
// Possibly an html link. Search for feed links
content := string(body)
if cs != "" {
if r, err := charset.NewReaderLabel(cs, bytes.NewReader(body)); err == nil {
if body, err := io.ReadAll(r); err == nil {
content = string(body)
}
}
}
sources := make([]FeedSource, 0)
for url, title := range scraper.FindFeeds(string(content), candidateUrl) {
for url, title := range scraper.FindFeeds(content, candidateUrl) {
sources = append(sources, FeedSource{Title: title, Url: url})
}
switch {
@@ -187,12 +190,7 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
return nil, nil
}
body, err := httpBody(res)
if err != nil {
return nil, err
}
feed, err := parser.Parse(body)
feed, err := parser.ParseAndFix(res.Body, f.FeedLink, getCharset(res))
if err != nil {
return nil, err
}
@@ -202,15 +200,42 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
if lmod != "" || etag != "" {
db.SetHTTPState(f.Id, lmod, etag)
}
feed.TranslateURLs(f.FeedLink)
feed.SetMissingDatesTo(time.Now())
return ConvertItems(feed.Items, f), nil
}
func httpBody(res *http.Response) (io.Reader, error) {
func getCharset(res *http.Response) string {
contentType := res.Header.Get("Content-Type")
if _, params, err := mime.ParseMediaType(contentType); err == nil {
if cs, ok := params["charset"]; ok {
if e, _ := charset.Lookup(cs); e != nil {
return cs
}
}
}
return ""
}
func GetBody(url string) (string, error) {
res, err := client.get(url)
if err != nil {
return "", err
}
defer res.Body.Close()
var r io.Reader
ctype := res.Header.Get("Content-Type")
if strings.Contains(ctype, "charset") {
return charset.NewReader(res.Body, ctype)
r, err = charset.NewReader(res.Body, ctype)
if err != nil {
return "", err
}
} else {
r = res.Body
}
return res.Body, nil
body, err := io.ReadAll(r)
if err != nil {
return "", err
}
return string(body), nil
}

View File

@@ -121,7 +121,11 @@ func (w *Worker) refresher(feeds []storage.Feed) {
srcqueue <- feed
}
for i := 0; i < len(feeds); i++ {
w.db.CreateItems(<-dstqueue)
items := <-dstqueue
if len(items) > 0 {
w.db.CreateItems(items)
w.db.SetFeedSize(items[0].FeedId, len(items))
}
atomic.AddInt32(w.pending, -1)
w.db.SyncSearch()
}