32 Commits
v2.2 ... v2.3

Author SHA1 Message Date
Nazar Kanaev
d2c034a850 v2.3 2022-05-03 20:40:39 +01:00
Nazar Kanaev
713930decc update changelog 2022-05-03 15:45:22 +01:00
Nazar Kanaev
ee2a825cf0 get rss link when atom link is present
found in: https://rss.nytimes.com/services/xml/rss/nyt/Arts.xml

when both rss and atom link elements are present, xml parser returns
empty string. provide default namespace to capture rss link properly.
2022-05-03 15:35:57 +01:00
Nazar Kanaev
8e9da86f83 nah 2022-04-09 16:06:19 +01:00
Nazar Kanaev
9eb49fd3a7 credits 2022-04-09 16:03:46 +01:00
Nazar Kanaev
684bc25b83 fix: load more items to prevent scroll lock 2022-04-09 15:58:33 +01:00
nkanaev
8ceab03cd7 fix text color in dark mode 2022-03-01 10:43:25 +00:00
Nazar Kanaev
34dad4ac8f systray: fix build flag 2022-02-16 14:04:04 +00:00
Nazar Kanaev
b40d930f8a credits 2022-02-15 22:12:32 +00:00
Nazar Kanaev
d4b34e900e update test 2022-02-15 22:04:16 +00:00
Nazar Kanaev
954b549029 update 2022-02-15 21:56:32 +00:00
Nazar Kanaev
fbd0b2310e update changelog 2022-02-14 20:33:28 +00:00
Nazar Kanaev
be7af0ccaf handle invalid chars in non-utf8 xml 2022-02-14 15:23:55 +00:00
Nazar Kanaev
18221ef12d use bytes.Buffer instead 2022-02-14 11:05:38 +00:00
Nazar Kanaev
4c0726412b do not build systray in linux 2022-02-14 00:56:03 +00:00
Nazar Kanaev
d7253a60b8 strip out invalid xml characters 2022-02-12 23:42:44 +00:00
Nazar Kanaev
2de3ddff08 fix test 2022-02-12 23:41:01 +00:00
Nazar Kanaev
830248b6ae store feed size 2022-02-10 22:14:47 +00:00
Nazar Kanaev
f8db2ef7ad delete old items based on feed size 2022-02-10 22:14:47 +00:00
Nazar Kanaev
109caaa889 cascade 2022-02-10 22:14:47 +00:00
Nazar Kanaev
d0b83babd2 initial work for smarter database cleanup 2022-02-10 22:14:47 +00:00
nkanaev
de3decbffd remove unused assets 2022-01-26 10:44:33 +00:00
nkanaev
c92229a698 update changelog 2022-01-24 16:56:55 +00:00
nkanaev
176852b662 credits 2022-01-24 16:52:29 +00:00
nkanaev
52cc8ecbbd fix encoding 2022-01-24 16:47:32 +00:00
nkanaev
e3e9542f1e fix page crawling encoding 2022-01-24 14:02:21 +00:00
nkanaev
b78c8bf8bf fix parsing opml with encoding 2022-01-24 13:10:30 +00:00
nkanaev
bff7476b58 refactoring 2022-01-24 12:50:52 +00:00
Nazar Kanaev
05f5785660 update promo.png 2022-01-18 14:36:05 +00:00
David Adi Nugroho
cb50aed89a add placeholder and autofocus to new feed url 2021-12-28 17:56:26 +00:00
Nazar Kanaev
df655aca5e remove todo 2021-11-20 22:42:33 +00:00
Nazar Kanaev
86853a87bf update changelog 2021-11-20 22:07:33 +00:00
35 changed files with 575 additions and 93 deletions

View File

@@ -1,5 +1,14 @@
# upcoming # upcoming
- (fix) handling encodings (thanks to @f100024 & @fserb)
- (fix) parsing xml feeds with illegal characters (thanks to @stepelu for the report)
- (fix) old articles reappearing as unread (thanks to @adaszko for the report)
- (fix) item list scrolling issue on large screens (thanks to @bielej for the report)
- (fix) keyboard shortcuts color in dark mode (thanks to @John09f9 for the report)
- (etc) autofocus when adding a new feed (thanks to @lakuapik)
# v2.2 (2021-11-20)
- (fix) windows console support (thanks to @dufferzafar for the report) - (fix) windows console support (thanks to @dufferzafar for the report)
- (fix) remove html tags from article titles (thanks to Alex Went for the report) - (fix) remove html tags from article titles (thanks to Alex Went for the report)
- (etc) autoselect current folder when adding a new feed (thanks to @krkk) - (etc) autoselect current folder when adding a new feed (thanks to @krkk)

View File

@@ -1 +0,0 @@
- feedlist keyboard navigation is flaky in "unread" section

Binary file not shown.

Before

Width:  |  Height:  |  Size: 430 KiB

After

Width:  |  Height:  |  Size: 223 KiB

View File

@@ -1,4 +1,4 @@
VERSION=2.2 VERSION=2.3
GITHASH=$(shell git rev-parse --short=8 HEAD) GITHASH=$(shell git rev-parse --short=8 HEAD)
CGO_ENABLED=1 CGO_ENABLED=1

View File

@@ -6,6 +6,7 @@ import (
"io" "io"
"io/fs" "io/fs"
"io/ioutil" "io/ioutil"
"log"
"os" "os"
) )
@@ -29,9 +30,18 @@ func Template(path string) *template.Template {
if !found { if !found {
tmpl = template.Must(template.New(path).Delims("{%", "%}").Funcs(template.FuncMap{ tmpl = template.Must(template.New(path).Delims("{%", "%}").Funcs(template.FuncMap{
"inline": func(svg string) template.HTML { "inline": func(svg string) template.HTML {
svgfile, _ := FS.Open("graphicarts/" + svg) svgfile, err := FS.Open("graphicarts/" + svg)
content, _ := ioutil.ReadAll(svgfile) // should never happen
svgfile.Close() if err != nil {
log.Fatal(err)
}
defer svgfile.Close()
content, err := ioutil.ReadAll(svgfile)
// should never happen
if err != nil {
log.Fatal(err)
}
return template.HTML(content) return template.HTML(content)
}, },
}).ParseFS(FS, path)) }).ParseFS(FS, path))

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-chevron-down"><polyline points="6 9 12 15 18 9"></polyline></svg>

Before

Width:  |  Height:  |  Size: 269 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-list"><line x1="8" y1="6" x2="21" y2="6"></line><line x1="8" y1="12" x2="21" y2="12"></line><line x1="8" y1="18" x2="21" y2="18"></line><line x1="3" y1="6" x2="3.01" y2="6"></line><line x1="3" y1="12" x2="3.01" y2="12"></line><line x1="3" y1="18" x2="3.01" y2="18"></line></svg>

Before

Width:  |  Height:  |  Size: 482 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-menu"><line x1="3" y1="12" x2="21" y2="12"></line><line x1="3" y1="6" x2="21" y2="6"></line><line x1="3" y1="18" x2="21" y2="18"></line></svg>

Before

Width:  |  Height:  |  Size: 346 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-more-vertical"><circle cx="12" cy="12" r="1"></circle><circle cx="12" cy="5" r="1"></circle><circle cx="12" cy="19" r="1"></circle></svg>

Before

Width:  |  Height:  |  Size: 341 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-settings"><circle cx="12" cy="12" r="3"></circle><path d="M19.4 15a1.65 1.65 0 0 0 .33 1.82l.06.06a2 2 0 0 1 0 2.83 2 2 0 0 1-2.83 0l-.06-.06a1.65 1.65 0 0 0-1.82-.33 1.65 1.65 0 0 0-1 1.51V21a2 2 0 0 1-2 2 2 2 0 0 1-2-2v-.09A1.65 1.65 0 0 0 9 19.4a1.65 1.65 0 0 0-1.82.33l-.06.06a2 2 0 0 1-2.83 0 2 2 0 0 1 0-2.83l.06-.06a1.65 1.65 0 0 0 .33-1.82 1.65 1.65 0 0 0-1.51-1H3a2 2 0 0 1-2-2 2 2 0 0 1 2-2h.09A1.65 1.65 0 0 0 4.6 9a1.65 1.65 0 0 0-.33-1.82l-.06-.06a2 2 0 0 1 0-2.83 2 2 0 0 1 2.83 0l.06.06a1.65 1.65 0 0 0 1.82.33H9a1.65 1.65 0 0 0 1-1.51V3a2 2 0 0 1 2-2 2 2 0 0 1 2 2v.09a1.65 1.65 0 0 0 1 1.51 1.65 1.65 0 0 0 1.82-.33l.06-.06a2 2 0 0 1 2.83 0 2 2 0 0 1 0 2.83l-.06.06a1.65 1.65 0 0 0-.33 1.82V9a1.65 1.65 0 0 0 1.51 1H21a2 2 0 0 1 2 2 2 2 0 0 1-2 2h-.09a1.65 1.65 0 0 0-1.51 1z"></path></svg>

Before

Width:  |  Height:  |  Size: 1011 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="feather feather-trash-2"><polyline points="3 6 5 6 21 6"></polyline><path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"></path><line x1="10" y1="11" x2="10" y2="17"></line><line x1="14" y1="11" x2="14" y2="17"></line></svg>

Before

Width:  |  Height:  |  Size: 448 B

View File

@@ -354,7 +354,7 @@
<p class="cursor-default"><b>New Feed</b></p> <p class="cursor-default"><b>New Feed</b></p>
<form action="" @submit.prevent="createFeed(event)" class="mt-4"> <form action="" @submit.prevent="createFeed(event)" class="mt-4">
<label for="feed-url">URL</label> <label for="feed-url">URL</label>
<input id="feed-url" name="url" type="url" class="form-control" required autocomplete="off" :readonly="feedNewChoice.length > 0"> <input id="feed-url" name="url" type="url" class="form-control" required autocomplete="off" :readonly="feedNewChoice.length > 0" placeholder="https://example.com/feed" v-focus>
<label for="feed-folder" class="mt-3 d-block"> <label for="feed-folder" class="mt-3 d-block">
Folder Folder
<a href="#" class="float-right text-decoration-none" @click.prevent="createNewFeedFolder()">new folder</a> <a href="#" class="float-right text-decoration-none" @click.prevent="createNewFeedFolder()">new folder</a>

View File

@@ -21,6 +21,12 @@ Vue.directive('scroll', {
}, },
}) })
Vue.directive('focus', {
inserted: function(el) {
el.focus()
}
})
Vue.component('drag', { Vue.component('drag', {
props: ['width'], props: ['width'],
template: '<div class="drag"></div>', template: '<div class="drag"></div>',
@@ -413,7 +419,7 @@ var vm = new Vue({
} }
this.loading.items = true this.loading.items = true
return api.items.list(query).then(function(data) { api.items.list(query).then(function(data) {
if (loadMore) { if (loadMore) {
vm.items = vm.items.concat(data.list) vm.items = vm.items.concat(data.list)
} else { } else {
@@ -421,14 +427,24 @@ var vm = new Vue({
} }
vm.itemsHasMore = data.has_more vm.itemsHasMore = data.has_more
vm.loading.items = false vm.loading.items = false
// load more if there's some space left at the bottom of the item list.
vm.$nextTick(function() {
if (vm.itemsHasMore && !vm.loading.items && vm.itemListCloseToBottom()) {
vm.refreshItems(true)
}
}) })
})
},
itemListCloseToBottom: function() {
var el = this.$refs.itemlist
var closeToBottom = (el.scrollHeight - el.scrollTop - el.offsetHeight) < 50
return closeToBottom
}, },
loadMoreItems: function(event, el) { loadMoreItems: function(event, el) {
if (!this.itemsHasMore) return if (!this.itemsHasMore) return
if (this.loading.items) return if (this.loading.items) return
var closeToBottom = (el.scrollHeight - el.scrollTop - el.offsetHeight) < 50 if (this.itemListCloseToBottom()) this.refreshItems(true)
if (closeToBottom) this.refreshItems(true)
}, },
markItemsRead: function() { markItemsRead: function() {
var query = this.getItemsQuery() var query = this.getItemsQuery()

View File

@@ -85,6 +85,10 @@ select.form-control:not([multiple]):not([size]) {
outline: none; outline: none;
} }
.table-compact {
color: unset !important;
}
.table-compact tr td:first-child { .table-compact tr td:first-child {
padding-left: 0; padding-left: 0;
} }

View File

@@ -11,18 +11,23 @@ import (
"time" "time"
"github.com/nkanaev/yarr/src/content/htmlutil" "github.com/nkanaev/yarr/src/content/htmlutil"
"golang.org/x/net/html/charset"
) )
var UnknownFormat = errors.New("unknown feed format") var UnknownFormat = errors.New("unknown feed format")
type processor func(r io.Reader) (*Feed, error) type feedProbe struct {
feedType string
callback func(r io.Reader) (*Feed, error)
encoding string
}
func sniff(lookup string) (string, processor) { func sniff(lookup string) (out feedProbe) {
lookup = strings.TrimSpace(lookup) lookup = strings.TrimSpace(lookup)
lookup = strings.TrimLeft(lookup, "\x00\xEF\xBB\xBF\xFE\xFF") lookup = strings.TrimLeft(lookup, "\x00\xEF\xBB\xBF\xFE\xFF")
if len(lookup) < 0 { if len(lookup) == 0 {
return "", nil return
} }
switch lookup[0] { switch lookup[0] {
@@ -33,24 +38,42 @@ func sniff(lookup string) (string, processor) {
if token == nil { if token == nil {
break break
} }
// check <?xml encoding="ENCODING" ?>
if el, ok := token.(xml.ProcInst); ok && el.Target == "xml" {
out.encoding = strings.ToLower(procInst("encoding", string(el.Inst)))
}
if el, ok := token.(xml.StartElement); ok { if el, ok := token.(xml.StartElement); ok {
switch el.Name.Local { switch el.Name.Local {
case "rss": case "rss":
return "rss", ParseRSS out.feedType = "rss"
out.callback = ParseRSS
return
case "RDF": case "RDF":
return "rdf", ParseRDF out.feedType = "rdf"
out.callback = ParseRDF
return
case "feed": case "feed":
return "atom", ParseAtom out.feedType = "atom"
out.callback = ParseAtom
return
} }
} }
} }
case '{': case '{':
return "json", ParseJSON out.feedType = "json"
out.callback = ParseJSON
return
} }
return "", nil return
} }
func Parse(r io.Reader) (*Feed, error) { func Parse(r io.Reader) (*Feed, error) {
return ParseWithEncoding(r, "")
}
func ParseWithEncoding(r io.Reader, fallbackEncoding string) (*Feed, error) {
lookup := make([]byte, 2048) lookup := make([]byte, 2048)
n, err := io.ReadFull(r, lookup) n, err := io.ReadFull(r, lookup)
switch { switch {
@@ -63,18 +86,42 @@ func Parse(r io.Reader) (*Feed, error) {
r = io.MultiReader(bytes.NewReader(lookup), r) r = io.MultiReader(bytes.NewReader(lookup), r)
} }
_, callback := sniff(string(lookup)) out := sniff(string(lookup))
if callback == nil { if out.feedType == "" {
return nil, UnknownFormat return nil, UnknownFormat
} }
feed, err := callback(r) if out.encoding == "" && fallbackEncoding != "" {
r, err = charset.NewReaderLabel(fallbackEncoding, r)
if err != nil {
return nil, err
}
}
if (out.feedType != "json") && (out.encoding == "" || out.encoding == "utf-8") {
// XML decoder will not rely on custom CharsetReader (see `xmlDecoder`)
// to handle invalid xml characters.
// Assume input is already UTF-8 and do the cleanup here.
r = NewSafeXMLReader(r)
}
feed, err := out.callback(r)
if feed != nil { if feed != nil {
feed.cleanup() feed.cleanup()
} }
return feed, err return feed, err
} }
func ParseAndFix(r io.Reader, baseURL, fallbackEncoding string) (*Feed, error) {
feed, err := ParseWithEncoding(r, fallbackEncoding)
if err != nil {
return nil, err
}
feed.TranslateURLs(baseURL)
feed.SetMissingDatesTo(time.Now())
return feed, nil
}
func (feed *Feed) cleanup() { func (feed *Feed) cleanup() {
feed.Title = strings.TrimSpace(feed.Title) feed.Title = strings.TrimSpace(feed.Title)
feed.SiteURL = strings.TrimSpace(feed.SiteURL) feed.SiteURL = strings.TrimSpace(feed.SiteURL)

View File

@@ -7,38 +7,40 @@ import (
) )
func TestSniff(t *testing.T) { func TestSniff(t *testing.T) {
testcases := [][2]string{ testcases := []struct{
input string
want feedProbe
}{
{ {
`<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`, `<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`,
"rdf", feedProbe{feedType: "rdf", callback: ParseRDF},
}, },
{ {
`<?xml version="1.0" encoding="ISO-8859-1"?><rss version="2.0"><channel></channel></rss>`, `<?xml version="1.0" encoding="ISO-8859-1"?><rss version="2.0"><channel></channel></rss>`,
"rss", feedProbe{feedType: "rss", callback: ParseRSS, encoding: "iso-8859-1"},
}, },
{ {
`<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`, `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`,
"rss", feedProbe{feedType: "rss", callback: ParseRSS},
}, },
{ {
`<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`, `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`,
"atom", feedProbe{feedType: "atom", callback: ParseAtom, encoding: "utf-8"},
}, },
{ {
`{}`, `{}`,
"json", feedProbe{feedType: "json", callback: ParseJSON},
}, },
{ {
`<!DOCTYPE html><html><head><title></title></head><body></body></html>`, `<!DOCTYPE html><html><head><title></title></head><body></body></html>`,
"", feedProbe{},
}, },
} }
for _, testcase := range testcases { for _, testcase := range testcases {
have, _ := sniff(testcase[0]) want := testcase.want
want := testcase[1] have := sniff(testcase.input)
if want != have { if want.encoding != have.encoding || want.feedType != have.feedType {
t.Log(testcase[0]) t.Errorf("Invalid output\n---\n%s\n---\n\nwant=%#v\nhave=%#v", testcase.input, want, have)
t.Errorf("Invalid format: want=%#v have=%#v", want, have)
} }
} }
} }
@@ -107,3 +109,44 @@ func TestParseFeedWithBOM(t *testing.T) {
t.FailNow() t.FailNow()
} }
} }
func TestParseCleanIllegalCharsInUTF8(t *testing.T) {
data := `
<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>` + "\a" + `title</title>
</item>
</channel>
</rss>
`
feed, err := Parse(strings.NewReader(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Items) != 1 || feed.Items[0].Title != "title" {
t.Fatalf("invalid feed, got: %v", feed)
}
}
func TestParseCleanIllegalCharsInNonUTF8(t *testing.T) {
// echo привет | iconv -f utf8 -t cp1251 | hexdump -C
data := `
<?xml version="1.0" encoding="windows-1251"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<item>
<title>` + "\a \xef\xf0\xe8\xe2\xe5\xf2\x0a \a" + `</title>
</item>
</channel>
</rss>
`
feed, err := Parse(strings.NewReader(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Items) != 1 || feed.Items[0].Title != "привет" {
t.Fatalf("invalid feed, got: %v", feed)
}
}

View File

@@ -22,7 +22,7 @@ type rssFeed struct {
type rssItem struct { type rssItem struct {
GUID string `xml:"guid"` GUID string `xml:"guid"`
Title string `xml:"title"` Title string `xml:"title"`
Link string `xml:"link"` Link string `xml:"rss link"`
Description string `xml:"rss description"` Description string `xml:"rss description"`
PubDate string `xml:"pubDate"` PubDate string `xml:"pubDate"`
Enclosures []rssEnclosure `xml:"enclosure"` Enclosures []rssEnclosure `xml:"enclosure"`

View File

@@ -1,6 +1,8 @@
package parser package parser
import ( import (
"bufio"
"bytes"
"encoding/xml" "encoding/xml"
"io" "io"
"regexp" "regexp"
@@ -30,6 +32,81 @@ func plain2html(text string) string {
func xmlDecoder(r io.Reader) *xml.Decoder { func xmlDecoder(r io.Reader) *xml.Decoder {
decoder := xml.NewDecoder(r) decoder := xml.NewDecoder(r)
decoder.Strict = false decoder.Strict = false
decoder.CharsetReader = charset.NewReaderLabel decoder.CharsetReader = func(cs string, input io.Reader) (io.Reader, error) {
r, err := charset.NewReaderLabel(cs, input)
if err == nil {
r = NewSafeXMLReader(r)
}
return r, err
}
return decoder return decoder
} }
type safexmlreader struct {
reader *bufio.Reader
buffer *bytes.Buffer
}
func NewSafeXMLReader(r io.Reader) io.Reader {
return &safexmlreader{
reader: bufio.NewReader(r),
buffer: bytes.NewBuffer(make([]byte, 0, 4096)),
}
}
func (xr *safexmlreader) Read(p []byte) (int, error) {
for xr.buffer.Len() < cap(p) {
r, _, err := xr.reader.ReadRune()
if err == io.EOF {
if xr.buffer.Len() == 0 {
return 0, io.EOF
}
break
}
if err != nil {
return 0, err
}
if isInCharacterRange(r) {
xr.buffer.WriteRune(r)
}
}
return xr.buffer.Read(p)
}
// NOTE: copied from "encoding/xml" package
// Decide whether the given rune is in the XML Character Range, per
// the Char production of https://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(r rune) (inrange bool) {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}
// NOTE: copied from "encoding/xml" package
// procInst parses the `param="..."` or `param='...'`
// value out of the provided string, returning "" if not found.
func procInst(param, s string) string {
// TODO: this parsing is somewhat lame and not exact.
// It works for all actual cases, though.
param = param + "="
idx := strings.Index(s, param)
if idx == -1 {
return ""
}
v := s[idx+len(param):]
if v == "" {
return ""
}
if v[0] != '\'' && v[0] != '"' {
return ""
}
idx = strings.IndexRune(v[1:], rune(v[0]))
if idx == -1 {
return ""
}
return v[1 : idx+1]
}

88
src/parser/util_test.go Normal file
View File

@@ -0,0 +1,88 @@
package parser
import (
"bytes"
"io"
"reflect"
"testing"
)
func TestSafeXMLReader(t *testing.T) {
var f io.Reader
want := []byte("привет мир")
f = bytes.NewReader(want)
f = NewSafeXMLReader(f)
have, err := io.ReadAll(f)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(want, have) {
t.Fatalf("invalid output\nwant: %v\nhave: %v", want, have)
}
}
func TestSafeXMLReaderRemoveUnwantedRunes(t *testing.T) {
var f io.Reader
input := []byte("\aпривет \x0cмир\ufffe\uffff")
want := []byte("привет мир")
f = bytes.NewReader(input)
f = NewSafeXMLReader(f)
have, err := io.ReadAll(f)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(want, have) {
t.Fatalf("invalid output\nwant: %v\nhave: %v", want, have)
}
}
func TestSafeXMLReaderPartial1(t *testing.T) {
var f io.Reader
input := []byte("\aпривет \x0cмир\ufffe\uffff")
want := []byte("привет мир")
f = bytes.NewReader(input)
f = NewSafeXMLReader(f)
buf := make([]byte, 1)
for i := 0; i < len(want); i++ {
n, err := f.Read(buf)
if err != nil {
t.Fatal(err)
}
if n != 1 {
t.Fatalf("expected 1 byte, got %d", n)
}
if buf[0] != want[i] {
t.Fatalf("invalid char at pos %d\nwant: %v\nhave: %v", i, want[i], buf[0])
}
}
if x, err := f.Read(buf); err != io.EOF {
t.Fatalf("expected EOF, %v, %v %v", buf, x, err)
}
}
func TestSafeXMLReaderPartial2(t *testing.T) {
var f io.Reader
input := []byte("привет\a\a\a\a\a")
f = bytes.NewReader(input)
f = NewSafeXMLReader(f)
buf := make([]byte, 12)
n, err := f.Read(buf)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
if n != 12 {
t.Fatalf("expected 12 bytes")
}
n, err = f.Read(buf)
if n != 0 {
t.Fatalf("expected 0")
}
if err != io.EOF {
t.Fatalf("expected EOF, got %v", err)
}
}

View File

@@ -3,6 +3,8 @@ package opml
import ( import (
"encoding/xml" "encoding/xml"
"io" "io"
"golang.org/x/net/html/charset"
) )
type opml struct { type opml struct {
@@ -45,6 +47,7 @@ func Parse(r io.Reader) (Folder, error) {
decoder := xml.NewDecoder(r) decoder := xml.NewDecoder(r)
decoder.Entity = xml.HTMLEntity decoder.Entity = xml.HTMLEntity
decoder.Strict = false decoder.Strict = false
decoder.CharsetReader = charset.NewReaderLabel
err := decoder.Decode(&val) err := decoder.Decode(&val)
if err != nil { if err != nil {

View File

@@ -1,6 +1,7 @@
package opml package opml
import ( import (
"os"
"reflect" "reflect"
"strings" "strings"
"testing" "testing"
@@ -87,3 +88,41 @@ func TestParseFallback(t *testing.T) {
t.Fatal("invalid opml") t.Fatal("invalid opml")
} }
} }
func TestParseWithEncoding(t *testing.T) {
file, err := os.Open("sample_win1251.xml")
if err != nil {
t.Fatal(err)
}
have, err := Parse(file)
if err != nil {
t.Fatal(err)
}
want := Folder{
Title: "",
Feeds: []Feed{
{
Title: "пример1",
FeedUrl: "https://baz.com/feed.xml",
SiteUrl: "https://baz.com/",
},
},
Folders: []Folder{
{
Title: "папка",
Feeds: []Feed{
{
Title: "пример2",
FeedUrl: "https://foo.com/feed.xml",
SiteUrl: "https://foo.com/",
},
},
},
},
}
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.Fatal("invalid opml")
}
}

View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="windows-1251"?>
<opml version="1.1">
<head><title><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD></title></head>
<body>
<outline text="<22><><EFBFBD><EFBFBD><EFBFBD>">
<outline type="rss" text="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2" description="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>2" xmlUrl="https://foo.com/feed.xml" htmlUrl="https://foo.com/"/>
</outline>
<outline type="rss" text="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1" description="<22><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>1" xmlUrl="https://baz.com/feed.xml" htmlUrl="https://baz.com/"/>
</body>
</opml>

View File

@@ -219,7 +219,11 @@ func (s *Server) handleFeedList(c *router.Context) {
result.FeedLink, result.FeedLink,
form.FolderID, form.FolderID,
) )
s.db.CreateItems(worker.ConvertItems(result.Feed.Items, *feed)) items := worker.ConvertItems(result.Feed.Items, *feed)
if len(items) > 0 {
s.db.CreateItems(items)
s.db.SetFeedSize(feed.Id, len(items))
}
s.worker.FindFeedFavicon(*feed) s.worker.FindFeedFavicon(*feed)
c.JSON(http.StatusOK, map[string]interface{}{ c.JSON(http.StatusOK, map[string]interface{}{
@@ -457,14 +461,13 @@ func (s *Server) handlePageCrawl(c *router.Context) {
return return
} }
res, err := http.Get(url) body, err := worker.GetBody(url)
if err != nil { if err != nil {
log.Print(err) log.Print(err)
c.Out.WriteHeader(http.StatusBadRequest) c.Out.WriteHeader(http.StatusBadRequest)
return return
} }
defer res.Body.Close() content, err := readability.ExtractContent(strings.NewReader(body))
content, err := readability.ExtractContent(res.Body)
if err != nil { if err != nil {
log.Print(err) log.Print(err)
c.Out.WriteHeader(http.StatusNoContent) c.Out.WriteHeader(http.StatusNoContent)

View File

@@ -194,3 +194,15 @@ func (s *Storage) GetFeedErrors() map[int64]string {
} }
return errors return errors
} }
func (s *Storage) SetFeedSize(feedId int64, size int) {
_, err := s.db.Exec(`
insert into feed_sizes (feed_id, size)
values (?, ?)
on conflict (feed_id) do update set size = excluded.size`,
feedId, size,
)
if err != nil {
log.Print(err)
}
}

View File

@@ -292,45 +292,70 @@ func (s *Storage) SyncSearch() {
} }
} }
var (
itemsKeepSize = 50
itemsKeepDays = 90
)
// Delete old articles from the database to cleanup space.
//
// The rules:
// * Never delete starred entries.
// * Keep at least the same amount of articles the feed provides (default: 50).
// This prevents from deleting items for rarely updated and/or ever-growing
// feeds which might eventually reappear as unread.
// * Keep entries for a certain period (default: 90 days).
func (s *Storage) DeleteOldItems() { func (s *Storage) DeleteOldItems() {
rows, err := s.db.Query(fmt.Sprintf(` rows, err := s.db.Query(`
select feed_id, count(*) as num_items select
from items i.feed_id,
where status != %d max(coalesce(s.size, 0), ?) as max_items,
group by feed_id count(*) as num_items
having num_items > 50 from items i
`, STARRED)) left outer join feed_sizes s on s.feed_id = i.feed_id
where status != ?
group by i.feed_id
`, itemsKeepSize, STARRED)
if err != nil { if err != nil {
log.Print(err) log.Print(err)
return return
} }
feedIds := make([]int64, 0) feedLimits := make(map[int64]int64, 0)
for rows.Next() { for rows.Next() {
var id int64 var feedId, limit int64
rows.Scan(&id, nil) rows.Scan(&feedId, &limit, nil)
feedIds = append(feedIds, id) feedLimits[feedId] = limit
} }
for _, feedId := range feedIds { for feedId, limit := range feedLimits {
result, err := s.db.Exec(` result, err := s.db.Exec(`
delete from items where feed_id = ? and status != ? and date_arrived < ?`, delete from items
where id in (
select i.id
from items i
where i.feed_id = ? and status != ?
order by date desc
limit -1 offset ?
) and date_arrived < ?
`,
feedId, feedId,
STARRED, STARRED,
time.Now().Add(-time.Hour*24*90), // 90 days limit,
time.Now().Add(-time.Hour*time.Duration(24*itemsKeepDays)),
) )
if err != nil { if err != nil {
log.Print(err) log.Print(err)
return return
} }
num, err := result.RowsAffected() numDeleted, err := result.RowsAffected()
if err != nil { if err != nil {
log.Print(err) log.Print(err)
return return
} }
if num > 0 { if numDeleted > 0 {
log.Printf("Deleted %d old items (%d)", num, feedId) log.Printf("Deleted %d old items (feed: %d)", numDeleted, feedId)
} }
} }
} }

View File

@@ -3,6 +3,7 @@ package storage
import ( import (
"log" "log"
"reflect" "reflect"
"strconv"
"testing" "testing"
"time" "time"
) )
@@ -271,3 +272,59 @@ func TestMarkItemsRead(t *testing.T) {
t.Fail() t.Fail()
} }
} }
func TestDeleteOldItems(t *testing.T) {
extraItems := 10
now := time.Now()
db := testDB()
feed := db.CreateFeed("feed", "", "", "http://test.com/feed11.xml", nil)
items := make([]Item, 0)
for i := 0; i < itemsKeepSize+extraItems; i++ {
istr := strconv.Itoa(i)
items = append(items, Item{
GUID: istr,
FeedId: feed.Id,
Title: istr,
Date: now.Add(time.Hour * time.Duration(i)),
})
}
db.CreateItems(items)
db.SetFeedSize(feed.Id, itemsKeepSize)
var feedSize int
err := db.db.QueryRow(
`select size from feed_sizes where feed_id = ?`, feed.Id,
).Scan(&feedSize)
if err != nil {
t.Fatal(err)
}
if feedSize != itemsKeepSize {
t.Fatalf(
"expected feed size to get updated\nwant: %d\nhave: %d",
itemsKeepSize+extraItems,
feedSize,
)
}
// expire only the first 3 articles
_, err = db.db.Exec(
`update items set date_arrived = ?
where id in (select id from items limit 3)`,
now.Add(-time.Hour*time.Duration(itemsKeepDays*24)),
)
if err != nil {
t.Fatal(err)
}
db.DeleteOldItems()
feedItems := db.ListItems(ItemFilter{FeedID: &feed.Id}, 1000, false)
if len(feedItems) != len(items)-3 {
t.Fatalf(
"invalid number of old items kept\nwant: %d\nhave: %d",
len(items)-3,
len(feedItems),
)
}
}

View File

@@ -13,6 +13,7 @@ var migrations = []func(*sql.Tx) error{
m04_item_podcasturl, m04_item_podcasturl,
m05_move_description_to_content, m05_move_description_to_content,
m06_fill_missing_dates, m06_fill_missing_dates,
m07_add_feed_size,
} }
var maxVersion = int64(len(migrations)) var maxVersion = int64(len(migrations))
@@ -259,3 +260,14 @@ func m06_fill_missing_dates(tx *sql.Tx) error {
_, err := tx.Exec(sql) _, err := tx.Exec(sql)
return err return err
} }
func m07_add_feed_size(tx *sql.Tx) error {
sql := `
create table if not exists feed_sizes (
feed_id references feeds(id) on delete cascade unique,
size integer not null default 0
);
`
_, err := tx.Exec(sql)
return err
}

View File

@@ -11,6 +11,7 @@ func testDB() *Storage {
log.SetOutput(io.Discard) log.SetOutput(io.Discard)
db, _ := New(":memory:") db, _ := New(":memory:")
log.SetOutput(os.Stderr) log.SetOutput(os.Stderr)
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
return db return db
} }

View File

@@ -9,3 +9,4 @@ hash:
changes: changes:
-removed `getlantern/golog` dependency -removed `getlantern/golog` dependency
-prevent from compiling in linux

View File

@@ -1,3 +1,5 @@
// +build darwin windows
/* /*
Package systray is a cross-platform Go library to place an icon and menu in the notification area. Package systray is a cross-platform Go library to place an icon and menu in the notification area.
*/ */

View File

@@ -1,3 +1,5 @@
// +build never
package systray package systray
/* /*

View File

@@ -1,9 +1,8 @@
// +build !windows // +build darwin
package systray package systray
/* /*
#cgo linux pkg-config: gtk+-3.0 appindicator3-0.1
#cgo darwin CFLAGS: -DDARWIN -x objective-c -fobjc-arc #cgo darwin CFLAGS: -DDARWIN -x objective-c -fobjc-arc
#cgo darwin LDFLAGS: -framework Cocoa #cgo darwin LDFLAGS: -framework Cocoa

View File

@@ -6,10 +6,10 @@ import (
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"mime"
"net/http" "net/http"
"net/url" "net/url"
"strings" "strings"
"time"
"github.com/nkanaev/yarr/src/content/scraper" "github.com/nkanaev/yarr/src/content/scraper"
"github.com/nkanaev/yarr/src/parser" "github.com/nkanaev/yarr/src/parser"
@@ -39,29 +39,32 @@ func DiscoverFeed(candidateUrl string) (*DiscoverResult, error) {
if res.StatusCode != 200 { if res.StatusCode != 200 {
return nil, fmt.Errorf("status code %d", res.StatusCode) return nil, fmt.Errorf("status code %d", res.StatusCode)
} }
cs := getCharset(res)
body, err := httpBody(res) body, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
content, err := ioutil.ReadAll(body)
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Try to feed into parser // Try to feed into parser
feed, err := parser.Parse(bytes.NewReader(content)) feed, err := parser.ParseAndFix(bytes.NewReader(body), candidateUrl, cs)
if err == nil { if err == nil {
feed.TranslateURLs(candidateUrl)
feed.SetMissingDatesTo(time.Now())
result.Feed = feed result.Feed = feed
result.FeedLink = candidateUrl result.FeedLink = candidateUrl
return result, nil return result, nil
} }
// Possibly an html link. Search for feed links // Possibly an html link. Search for feed links
content := string(body)
if cs != "" {
if r, err := charset.NewReaderLabel(cs, bytes.NewReader(body)); err == nil {
if body, err := io.ReadAll(r); err == nil {
content = string(body)
}
}
}
sources := make([]FeedSource, 0) sources := make([]FeedSource, 0)
for url, title := range scraper.FindFeeds(string(content), candidateUrl) { for url, title := range scraper.FindFeeds(content, candidateUrl) {
sources = append(sources, FeedSource{Title: title, Url: url}) sources = append(sources, FeedSource{Title: title, Url: url})
} }
switch { switch {
@@ -187,12 +190,7 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
return nil, nil return nil, nil
} }
body, err := httpBody(res) feed, err := parser.ParseAndFix(res.Body, f.FeedLink, getCharset(res))
if err != nil {
return nil, err
}
feed, err := parser.Parse(body)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -202,15 +200,42 @@ func listItems(f storage.Feed, db *storage.Storage) ([]storage.Item, error) {
if lmod != "" || etag != "" { if lmod != "" || etag != "" {
db.SetHTTPState(f.Id, lmod, etag) db.SetHTTPState(f.Id, lmod, etag)
} }
feed.TranslateURLs(f.FeedLink)
feed.SetMissingDatesTo(time.Now())
return ConvertItems(feed.Items, f), nil return ConvertItems(feed.Items, f), nil
} }
func httpBody(res *http.Response) (io.Reader, error) { func getCharset(res *http.Response) string {
contentType := res.Header.Get("Content-Type")
if _, params, err := mime.ParseMediaType(contentType); err == nil {
if cs, ok := params["charset"]; ok {
if e, _ := charset.Lookup(cs); e != nil {
return cs
}
}
}
return ""
}
func GetBody(url string) (string, error) {
res, err := client.get(url)
if err != nil {
return "", err
}
defer res.Body.Close()
var r io.Reader
ctype := res.Header.Get("Content-Type") ctype := res.Header.Get("Content-Type")
if strings.Contains(ctype, "charset") { if strings.Contains(ctype, "charset") {
return charset.NewReader(res.Body, ctype) r, err = charset.NewReader(res.Body, ctype)
if err != nil {
return "", err
} }
return res.Body, nil } else {
r = res.Body
}
body, err := io.ReadAll(r)
if err != nil {
return "", err
}
return string(body), nil
} }

View File

@@ -121,7 +121,11 @@ func (w *Worker) refresher(feeds []storage.Feed) {
srcqueue <- feed srcqueue <- feed
} }
for i := 0; i < len(feeds); i++ { for i := 0; i < len(feeds); i++ {
w.db.CreateItems(<-dstqueue) items := <-dstqueue
if len(items) > 0 {
w.db.CreateItems(items)
w.db.SetFeedSize(items[0].FeedId, len(items))
}
atomic.AddInt32(w.pending, -1) atomic.AddInt32(w.pending, -1)
w.db.SyncSearch() w.db.SyncSearch()
} }