This commit is contained in:
Nazar Kanaev 2021-03-23 10:19:27 +00:00
parent 7d61f705bf
commit c91b439878
2 changed files with 69 additions and 7 deletions

View File

@ -8,6 +8,7 @@ import (
"io"
"net/url"
"strings"
"time"
)
var UnknownFormat = errors.New("unknown feed format")
@ -42,21 +43,43 @@ func sniff(lookup string) (string, processor) {
}
func Parse(r io.Reader) (*Feed, error) {
chunk := make([]byte, 1024)
if _, err := r.Read(chunk); err != nil {
lookup := make([]byte, 1024)
if _, err := r.Read(lookup); err != nil {
return nil, fmt.Errorf("Failed to read input: %s", err)
}
_, callback := sniff(string(chunk))
_, callback := sniff(string(lookup))
if callback == nil {
return nil, UnknownFormat
}
r = io.MultiReader(bytes.NewReader(chunk), r)
return callback(r)
feed, err := callback(io.MultiReader(bytes.NewReader(lookup), r))
if feed != nil {
feed.cleanup()
}
return feed, err
}
func FixURLs(feed *Feed, base string) error {
func (feed *Feed) cleanup() {
feed.Title = strings.TrimSpace(feed.Title)
feed.SiteURL = strings.TrimSpace(feed.SiteURL)
for i, item := range feed.Items {
feed.Items[i].GUID = strings.TrimSpace(item.GUID)
feed.Items[i].URL = strings.TrimSpace(item.URL)
feed.Items[i].Title = strings.TrimSpace(item.Title)
feed.Items[i].Content = strings.TrimSpace(item.Content)
}
}
func (feed *Feed) SetMissingDatesTo(newdate time.Time) {
for i, item := range feed.Items {
if item.Date.Equal(defaultTime) {
feed.Items[i].Date = newdate
}
}
}
func (feed *Feed) TranslateURLs(base string) error {
baseUrl, err := url.Parse(base)
if err != nil {
return fmt.Errorf("failed to parse base url: %#v", base)

View File

@ -1,6 +1,10 @@
package feed
import "testing"
import (
"reflect"
"strings"
"testing"
)
func TestSniff(t *testing.T) {
testcases := [][2]string{
@ -34,3 +38,38 @@ func TestSniff(t *testing.T) {
}
}
}
func TestParse(t *testing.T) {
have, _ := Parse(strings.NewReader(`
<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>
Title
</title>
<item>
<title>
Item 1
</title>
<description>
<![CDATA[<div>content</div>]]>
</description>
</item>
</channel>
</rss>
`))
want := &Feed{
Title: "Title",
Items: []Item{
{
Title: "Item 1",
Content: "<div>content</div>",
},
},
}
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.Fatal("invalid content")
}
}