mirror of
https://github.com/nkanaev/yarr.git
synced 2025-09-14 10:20:06 +00:00
rename packaages
This commit is contained in:
86
src/parser/atom.go
Normal file
86
src/parser/atom.go
Normal file
@@ -0,0 +1,86 @@
|
||||
// Atom 1.0 parser
|
||||
package parser
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"html"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type atomFeed struct {
|
||||
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
|
||||
ID string `xml:"id"`
|
||||
Title atomText `xml:"title"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Entries []atomEntry `xml:"entry"`
|
||||
}
|
||||
|
||||
type atomEntry struct {
|
||||
ID string `xml:"id"`
|
||||
Title atomText `xml:"title"`
|
||||
Summary atomText `xml:"summary"`
|
||||
Published string `xml:"published"`
|
||||
Updated string `xml:"updated"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Content atomText `xml:"content"`
|
||||
}
|
||||
|
||||
type atomText struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:",chardata"`
|
||||
XML string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type atomLink struct {
|
||||
Href string `xml:"href,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type atomLinks []atomLink
|
||||
|
||||
func (a *atomText) String() string {
|
||||
data := a.Data
|
||||
if a.Type == "xhtml" {
|
||||
data = a.XML
|
||||
}
|
||||
return html.UnescapeString(strings.TrimSpace(data))
|
||||
}
|
||||
|
||||
func (links atomLinks) First(rel string) string {
|
||||
for _, l := range links {
|
||||
if l.Rel == rel {
|
||||
return l.Href
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func ParseAtom(r io.Reader) (*Feed, error) {
|
||||
srcfeed := atomFeed{}
|
||||
|
||||
decoder := xmlDecoder(r)
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title.String(),
|
||||
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
|
||||
}
|
||||
for _, srcitem := range srcfeed.Entries {
|
||||
imageUrl := ""
|
||||
podcastUrl := ""
|
||||
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: firstNonEmpty(srcitem.ID),
|
||||
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
|
||||
URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
|
||||
Title: srcitem.Title.String(),
|
||||
Content: srcitem.Content.String(),
|
||||
ImageURL: imageUrl,
|
||||
PodcastURL: podcastUrl,
|
||||
})
|
||||
}
|
||||
return dstfeed, nil
|
||||
}
|
58
src/parser/atom_test.go
Normal file
58
src/parser/atom_test.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestAtom(t *testing.T) {
|
||||
have, _ := Parse(strings.NewReader(`
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<subtitle>A subtitle.</subtitle>
|
||||
<link href="http://example.org/feed/" rel="self" />
|
||||
<link href="http://example.org/" />
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03" />
|
||||
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
|
||||
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>
|
||||
</content>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
<email>johndoe@example.com</email>
|
||||
</author>
|
||||
</entry>
|
||||
</feed>
|
||||
`))
|
||||
want := &Feed{
|
||||
Title: "Example Feed",
|
||||
SiteURL: "http://example.org/",
|
||||
Items: []Item{
|
||||
{
|
||||
GUID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
|
||||
Date: time.Unix(1071340202, 0).UTC(),
|
||||
URL: "http://example.org/2003/12/13/atom03.html",
|
||||
Title: "Atom-Powered Robots Run Amok",
|
||||
Content: `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`,
|
||||
ImageURL: "",
|
||||
PodcastURL: "",
|
||||
},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(want, have) {
|
||||
t.Logf("want: %#v", want)
|
||||
t.Logf("have: %#v", have)
|
||||
t.Fatal("invalid atom")
|
||||
}
|
||||
}
|
100
src/parser/feed.go
Normal file
100
src/parser/feed.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var UnknownFormat = errors.New("unknown feed format")
|
||||
|
||||
type processor func(r io.Reader) (*Feed, error)
|
||||
|
||||
func sniff(lookup string) (string, processor) {
|
||||
lookup = strings.TrimSpace(lookup)
|
||||
switch lookup[0] {
|
||||
case '<':
|
||||
decoder := xmlDecoder(strings.NewReader(lookup))
|
||||
for {
|
||||
token, _ := decoder.Token()
|
||||
if token == nil {
|
||||
break
|
||||
}
|
||||
if el, ok := token.(xml.StartElement); ok {
|
||||
switch el.Name.Local {
|
||||
case "rss":
|
||||
return "rss", ParseRSS
|
||||
case "RDF":
|
||||
return "rdf", ParseRDF
|
||||
case "feed":
|
||||
return "atom", ParseAtom
|
||||
}
|
||||
}
|
||||
}
|
||||
case '{':
|
||||
return "json", ParseJSON
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func Parse(r io.Reader) (*Feed, error) {
|
||||
lookup := make([]byte, 1024)
|
||||
if _, err := r.Read(lookup); err != nil {
|
||||
return nil, fmt.Errorf("Failed to read input: %s", err)
|
||||
}
|
||||
|
||||
_, callback := sniff(string(lookup))
|
||||
if callback == nil {
|
||||
return nil, UnknownFormat
|
||||
}
|
||||
|
||||
feed, err := callback(io.MultiReader(bytes.NewReader(lookup), r))
|
||||
if feed != nil {
|
||||
feed.cleanup()
|
||||
}
|
||||
return feed, err
|
||||
}
|
||||
|
||||
func (feed *Feed) cleanup() {
|
||||
feed.Title = strings.TrimSpace(feed.Title)
|
||||
feed.SiteURL = strings.TrimSpace(feed.SiteURL)
|
||||
for i, item := range feed.Items {
|
||||
feed.Items[i].GUID = strings.TrimSpace(item.GUID)
|
||||
feed.Items[i].URL = strings.TrimSpace(item.URL)
|
||||
feed.Items[i].Title = strings.TrimSpace(item.Title)
|
||||
feed.Items[i].Content = strings.TrimSpace(item.Content)
|
||||
}
|
||||
}
|
||||
|
||||
func (feed *Feed) SetMissingDatesTo(newdate time.Time) {
|
||||
for i, item := range feed.Items {
|
||||
if item.Date.Equal(defaultTime) {
|
||||
feed.Items[i].Date = newdate
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (feed *Feed) TranslateURLs(base string) error {
|
||||
baseUrl, err := url.Parse(base)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse base url: %#v", base)
|
||||
}
|
||||
siteUrl, err := url.Parse(feed.SiteURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse feed url: %#v", feed.SiteURL)
|
||||
}
|
||||
feed.SiteURL = baseUrl.ResolveReference(siteUrl).String()
|
||||
for _, item := range feed.Items {
|
||||
itemUrl, err := url.Parse(item.URL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse item url: %#v", item.URL)
|
||||
}
|
||||
item.URL = siteUrl.ResolveReference(itemUrl).String()
|
||||
}
|
||||
return nil
|
||||
}
|
79
src/parser/feed_test.go
Normal file
79
src/parser/feed_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSniff(t *testing.T) {
|
||||
testcases := [][2]string{
|
||||
{
|
||||
`<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`,
|
||||
"rdf",
|
||||
},
|
||||
{
|
||||
`<?xml version="1.0" encoding="ISO-8859-1"?><rss version="2.0"><channel></channel></rss>`,
|
||||
"rss",
|
||||
},
|
||||
{
|
||||
`<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`,
|
||||
"rss",
|
||||
},
|
||||
{
|
||||
`<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`,
|
||||
"atom",
|
||||
},
|
||||
{
|
||||
`{}`,
|
||||
"json",
|
||||
},
|
||||
{
|
||||
`<!DOCTYPE html><html><head><title></title></head><body></body></html>`,
|
||||
"",
|
||||
},
|
||||
}
|
||||
for _, testcase := range testcases {
|
||||
have, _ := sniff(testcase[0])
|
||||
want := testcase[1]
|
||||
if want != have {
|
||||
t.Log(testcase[0])
|
||||
t.Errorf("Invalid format: want=%#v have=%#v", want, have)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParse(t *testing.T) {
|
||||
have, _ := Parse(strings.NewReader(`
|
||||
<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>
|
||||
Title
|
||||
</title>
|
||||
<item>
|
||||
<title>
|
||||
Item 1
|
||||
</title>
|
||||
<description>
|
||||
<![CDATA[<div>content</div>]]>
|
||||
</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
`))
|
||||
want := &Feed{
|
||||
Title: "Title",
|
||||
Items: []Item{
|
||||
{
|
||||
Title: "Item 1",
|
||||
Content: "<div>content</div>",
|
||||
},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(want, have) {
|
||||
t.Logf("want: %#v", want)
|
||||
t.Logf("have: %#v", have)
|
||||
t.Fatal("invalid content")
|
||||
}
|
||||
}
|
57
src/parser/json.go
Normal file
57
src/parser/json.go
Normal file
@@ -0,0 +1,57 @@
|
||||
// JSON 1.0 parser
|
||||
package parser
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
)
|
||||
|
||||
type jsonFeed struct {
|
||||
Version string `json:"version"`
|
||||
Title string `json:"title"`
|
||||
SiteURL string `json:"home_page_url"`
|
||||
Items []jsonItem `json:"items"`
|
||||
}
|
||||
|
||||
type jsonItem struct {
|
||||
ID string `json:"id"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Text string `json:"content_text"`
|
||||
HTML string `json:"content_html"`
|
||||
DatePublished string `json:"date_published"`
|
||||
DateModified string `json:"date_modified"`
|
||||
Attachments []jsonAttachment `json:"attachments"`
|
||||
}
|
||||
|
||||
type jsonAttachment struct {
|
||||
URL string `json:"url"`
|
||||
MimeType string `json:"mime_type"`
|
||||
Title string `json:"title"`
|
||||
Size int64 `json:"size_in_bytes"`
|
||||
Duration int `json:"duration_in_seconds"`
|
||||
}
|
||||
|
||||
func ParseJSON(data io.Reader) (*Feed, error) {
|
||||
srcfeed := new(jsonFeed)
|
||||
decoder := json.NewDecoder(data)
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title,
|
||||
SiteURL: srcfeed.SiteURL,
|
||||
}
|
||||
for _, srcitem := range srcfeed.Items {
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: srcitem.ID,
|
||||
Date: dateParse(firstNonEmpty(srcitem.DatePublished, srcitem.DateModified)),
|
||||
URL: srcitem.URL,
|
||||
Title: srcitem.Title,
|
||||
Content: firstNonEmpty(srcitem.HTML, srcitem.Text, srcitem.Summary),
|
||||
})
|
||||
}
|
||||
return dstfeed, nil
|
||||
}
|
42
src/parser/json_test.go
Normal file
42
src/parser/json_test.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestJSONFeed(t *testing.T) {
|
||||
have, _ := Parse(strings.NewReader(`{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}`))
|
||||
want := &Feed{
|
||||
Title: "My Example Feed",
|
||||
SiteURL: "https://example.org/",
|
||||
Items: []Item{
|
||||
{GUID: "2", Content: "This is a second item.", URL: "https://example.org/second-item"},
|
||||
{GUID: "1", Content: "<p>Hello, world!</p>", URL: "https://example.org/initial-post"},
|
||||
},
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(want, have) {
|
||||
t.Logf("want: %#v", want)
|
||||
t.Logf("have: %#v", have)
|
||||
t.Fatal("invalid json")
|
||||
}
|
||||
}
|
20
src/parser/models.go
Normal file
20
src/parser/models.go
Normal file
@@ -0,0 +1,20 @@
|
||||
package parser
|
||||
|
||||
import "time"
|
||||
|
||||
type Feed struct {
|
||||
Title string
|
||||
SiteURL string
|
||||
Items []Item
|
||||
}
|
||||
|
||||
type Item struct {
|
||||
GUID string
|
||||
Date time.Time
|
||||
URL string
|
||||
Title string
|
||||
|
||||
Content string
|
||||
ImageURL string
|
||||
PodcastURL string
|
||||
}
|
47
src/parser/rdf.go
Normal file
47
src/parser/rdf.go
Normal file
@@ -0,0 +1,47 @@
|
||||
// Parser for RSS versions:
|
||||
// - 0.90
|
||||
// - 1.0
|
||||
package parser
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
)
|
||||
|
||||
type rdfFeed struct {
|
||||
XMLName xml.Name `xml:"RDF"`
|
||||
Title string `xml:"channel>title"`
|
||||
Link string `xml:"channel>link"`
|
||||
Items []rdfItem `xml:"item"`
|
||||
}
|
||||
|
||||
type rdfItem struct {
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
|
||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
}
|
||||
|
||||
func ParseRDF(r io.Reader) (*Feed, error) {
|
||||
srcfeed := rdfFeed{}
|
||||
|
||||
decoder := xmlDecoder(r)
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title,
|
||||
SiteURL: srcfeed.Link,
|
||||
}
|
||||
for _, srcitem := range srcfeed.Items {
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: srcitem.Link,
|
||||
URL: srcitem.Link,
|
||||
Title: srcitem.Title,
|
||||
})
|
||||
}
|
||||
return dstfeed, nil
|
||||
}
|
54
src/parser/rdf_test.go
Normal file
54
src/parser/rdf_test.go
Normal file
@@ -0,0 +1,54 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRDFFeed(t *testing.T) {
|
||||
have, _ := Parse(strings.NewReader(`<?xml version="1.0"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://channel.netscape.com/rdf/simple/0.9/">
|
||||
|
||||
<channel>
|
||||
<title>Mozilla Dot Org</title>
|
||||
<link>http://www.mozilla.org</link>
|
||||
<description>the Mozilla Organization
|
||||
web site</description>
|
||||
</channel>
|
||||
|
||||
<image>
|
||||
<title>Mozilla</title>
|
||||
<url>http://www.mozilla.org/images/moz.gif</url>
|
||||
<link>http://www.mozilla.org</link>
|
||||
</image>
|
||||
|
||||
<item>
|
||||
<title>New Status Updates</title>
|
||||
<link>http://www.mozilla.org/status/</link>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Bugzilla Reorganized</title>
|
||||
<link>http://www.mozilla.org/bugs/</link>
|
||||
</item>
|
||||
|
||||
</rdf:RDF>
|
||||
`))
|
||||
want := &Feed{
|
||||
Title: "Mozilla Dot Org",
|
||||
SiteURL: "http://www.mozilla.org",
|
||||
Items: []Item{
|
||||
{GUID: "http://www.mozilla.org/status/", URL: "http://www.mozilla.org/status/", Title: "New Status Updates"},
|
||||
{GUID: "http://www.mozilla.org/bugs/", URL: "http://www.mozilla.org/bugs/", Title: "Bugzilla Reorganized"},
|
||||
},
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(want, have) {
|
||||
t.Logf("want: %#v", want)
|
||||
t.Logf("have: %#v", have)
|
||||
t.Fatal("invalid rdf")
|
||||
}
|
||||
}
|
81
src/parser/rss.go
Normal file
81
src/parser/rss.go
Normal file
@@ -0,0 +1,81 @@
|
||||
// Parser for RSS versions:
|
||||
// - 0.91 netscape
|
||||
// - 0.91 userland
|
||||
// - 2.0
|
||||
package parser
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
)
|
||||
|
||||
type rssFeed struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"channel>title"`
|
||||
Link string `xml:"channel>link"`
|
||||
Items []rssItem `xml:"channel>item"`
|
||||
}
|
||||
|
||||
type rssItem struct {
|
||||
GUID string `xml:"guid"`
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"rss description"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
EnclosureLinks []rssEnclosure `xml:"enclosure"`
|
||||
|
||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
ContentEncoded string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
|
||||
FeedBurnerLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
|
||||
FeedBurnerEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
|
||||
|
||||
ItunesSubtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd subtitle"`
|
||||
ItunesSummary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd summary"`
|
||||
GoogleDescription string `xml:"http://www.google.com/schemas/play-podcasts/1.0 description"`
|
||||
}
|
||||
|
||||
type rssLink struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Href string `xml:"href,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type rssTitle struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type rssEnclosure struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
func ParseRSS(r io.Reader) (*Feed, error) {
|
||||
srcfeed := rssFeed{}
|
||||
|
||||
decoder := xmlDecoder(r)
|
||||
decoder.DefaultSpace = "rss"
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title,
|
||||
SiteURL: srcfeed.Link,
|
||||
}
|
||||
for _, srcitem := range srcfeed.Items {
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: firstNonEmpty(srcitem.GUID, srcitem.Link),
|
||||
Date: dateParse(firstNonEmpty(srcitem.DublinCoreDate, srcitem.PubDate)),
|
||||
URL: srcitem.Link,
|
||||
Title: srcitem.Title,
|
||||
Content: srcitem.Description,
|
||||
})
|
||||
}
|
||||
return dstfeed, nil
|
||||
}
|
56
src/parser/rss_test.go
Normal file
56
src/parser/rss_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRSSFeed(t *testing.T) {
|
||||
have, _ := Parse(strings.NewReader(`
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
|
||||
<rss version="0.91">
|
||||
<channel>
|
||||
<language>en</language>
|
||||
<description>???</description>
|
||||
<link>http://www.scripting.com/</link>
|
||||
<title>Scripting News</title>
|
||||
<item>
|
||||
<title>Title 1</title>
|
||||
<link>http://www.scripting.com/one/</link>
|
||||
<description>Description 1</description>
|
||||
</item>
|
||||
<item>
|
||||
<title>Title 2</title>
|
||||
<link>http://www.scripting.com/two/</link>
|
||||
<description>Description 2</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
`))
|
||||
want := &Feed{
|
||||
Title: "Scripting News",
|
||||
SiteURL: "http://www.scripting.com/",
|
||||
Items: []Item{
|
||||
{
|
||||
GUID: "http://www.scripting.com/one/",
|
||||
URL: "http://www.scripting.com/one/",
|
||||
Title: "Title 1",
|
||||
Content: "Description 1",
|
||||
},
|
||||
{
|
||||
GUID: "http://www.scripting.com/two/",
|
||||
URL: "http://www.scripting.com/two/",
|
||||
Title: "Title 2",
|
||||
Content: "Description 2",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(want, have) {
|
||||
t.Logf("want: %#v", want)
|
||||
t.Logf("have: %#v", have)
|
||||
t.Fatal("invalid rss")
|
||||
}
|
||||
}
|
239
src/parser/utils.go
Normal file
239
src/parser/utils.go
Normal file
@@ -0,0 +1,239 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"golang.org/x/net/html/charset"
|
||||
"time"
|
||||
)
|
||||
|
||||
func firstNonEmpty(vals ...string) string {
|
||||
for _, val := range vals {
|
||||
if len(val) > 0 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func xmlDecoder(r io.Reader) *xml.Decoder {
|
||||
decoder := xml.NewDecoder(r)
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
return decoder
|
||||
}
|
||||
|
||||
// taken from github.com/mjibson/goread
|
||||
var dateFormats = []string{
|
||||
time.RFC822, // RSS
|
||||
time.RFC822Z, // RSS
|
||||
time.RFC3339, // Atom
|
||||
time.UnixDate,
|
||||
time.RubyDate,
|
||||
time.RFC850,
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
time.ANSIC,
|
||||
"Mon, 02 Jan 2006 15:04:05 MST -07:00",
|
||||
"Mon, January 2, 2006, 3:04 PM MST",
|
||||
"Mon, January 2 2006 15:04:05 -0700",
|
||||
"Mon, January 02, 2006, 15:04:05 MST",
|
||||
"Mon, January 02, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2, 2006 15:04 MST",
|
||||
"Mon, Jan 2 2006 15:04 MST",
|
||||
"Mon, Jan 2 2006 15:04:05 MST",
|
||||
"Mon, Jan 2, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2 2006 15:04:05 -700",
|
||||
"Mon, Jan 2 2006 15:04:05 -0700",
|
||||
"Mon Jan 2 15:04 2006",
|
||||
"Mon Jan 2 15:04:05 2006 MST",
|
||||
"Mon Jan 02, 2006 3:04 pm",
|
||||
"Mon, Jan 02,2006 15:04:05 MST",
|
||||
"Mon Jan 02 2006 15:04:05 -0700",
|
||||
"Mon, 02/01/2006",
|
||||
"Monday, 2. January 2006 - 15:04",
|
||||
"Monday 02 January 2006",
|
||||
"Monday, January 2, 2006 15:04:05 MST",
|
||||
"Monday, January 2, 2006 03:04 PM",
|
||||
"Monday, January 2, 2006",
|
||||
"Monday, January 02, 2006",
|
||||
"Monday, 2 January 2006 15:04:05 MST",
|
||||
"Monday, 2 January 2006 15:04:05 -0700",
|
||||
"Monday, 2 Jan 2006 15:04:05 MST",
|
||||
"Monday, 2 Jan 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05 MST",
|
||||
"Monday, 02 January 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05",
|
||||
"Monday, January 02, 2006 - 3:04pm",
|
||||
"Monday, January 2, 2006 - 3:04pm",
|
||||
"Mon, 01/02/2006 - 15:04",
|
||||
"Mon, 2 January 2006 15:04 MST",
|
||||
"Mon, 2 January 2006, 15:04 -0700",
|
||||
"Mon, 2 January 2006, 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 -0700",
|
||||
"Mon, 2 January 2006",
|
||||
"nilMon, 2 Jan 2006 3:04:05 PM -0700",
|
||||
"Mon, 2 Jan 2006 15:4:5 MST",
|
||||
"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
|
||||
"Mon, 2, Jan 2006 15:4",
|
||||
"Mon, 2 Jan 2006 15:04 MST",
|
||||
"Mon, 2 Jan 2006, 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 UT",
|
||||
"Mon, 2 Jan 2006 15:04:05MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 MST",
|
||||
"Mon 2 Jan 2006 15:04:05 MST",
|
||||
"mon,2 Jan 2006 15:04:05 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05-0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05",
|
||||
"Mon, 2 Jan 2006 15:04",
|
||||
"Mon, 02 Jan 2006, 15:04",
|
||||
"Mon, 2 Jan 2006, 15:04",
|
||||
"Mon,2 Jan 2006",
|
||||
"Mon, 2 Jan 2006",
|
||||
"Mon, 2 Jan 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 -0700",
|
||||
"Mon, 2006-01-02 15:04",
|
||||
"Mon,02 January 2006 14:04:05 MST",
|
||||
"Mon, 02 January 2006",
|
||||
"Mon, 02 Jan 2006 3:04:05 PM MST",
|
||||
"Mon, 02 Jan 2006 15 -0700",
|
||||
"Mon,02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 Z",
|
||||
"Mon, 02 Jan 2006 15:04:05 UT",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST-07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST -0700",
|
||||
"Mon, 02 Jan 2006, 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Mon , 02 Jan 2006 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 GMT-0700",
|
||||
"Mon,02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 --0700",
|
||||
"Mon 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon 02 Jan 2006, 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07",
|
||||
"Mon, 02 Jan 2006 15:04:05 00",
|
||||
"Mon, 02 Jan 2006 15:04:05",
|
||||
"Mon, 02 Jan 2006",
|
||||
"Mon, 02 Jan 06 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 3:04 PM MST",
|
||||
"Mon Jan 02 2006 15:04:05 MST",
|
||||
"Mon, 01 02 2006 15:04:05 -0700",
|
||||
"Mon, 2th Jan 2006 15:05:05 MST",
|
||||
"Jan. 2, 2006, 3:04 a.m.",
|
||||
"fri, 02 jan 2006 15:04:05 -0700",
|
||||
"January 02 2006 03:04:05 PM",
|
||||
"January 2, 2006 3:04 PM",
|
||||
"January 2, 2006, 3:04 p.m.",
|
||||
"January 2, 2006 15:04:05 MST",
|
||||
"January 2, 2006 15:04:05",
|
||||
"January 2, 2006 03:04 PM",
|
||||
"January 2, 2006",
|
||||
"January 02, 2006 15:04:05 MST",
|
||||
"January 02, 2006 15:04",
|
||||
"January 02, 2006 03:04 PM",
|
||||
"January 02, 2006",
|
||||
"Jan 2, 2006 3:04:05 PM MST",
|
||||
"Jan 2, 2006 3:04:05 PM",
|
||||
"Jan 2, 2006 15:04:05 MST",
|
||||
"Jan 2, 2006",
|
||||
"Jan 02 2006 03:04:05PM",
|
||||
"Jan 02, 2006",
|
||||
"6/1/2 15:04",
|
||||
"6-1-2 15:04",
|
||||
"2 January 2006 15:04:05 MST",
|
||||
"2 January 2006 15:04:05 -0700",
|
||||
"2 January 2006",
|
||||
"2 Jan 2006 15:04:05 Z",
|
||||
"2 Jan 2006 15:04:05 MST",
|
||||
"2 Jan 2006 15:04:05 -0700",
|
||||
"2 Jan 2006",
|
||||
"2 Jan 2006 15:04 MST",
|
||||
"2.1.2006 15:04:05",
|
||||
"2/1/2006",
|
||||
"2-1-2006",
|
||||
"2006 January 02",
|
||||
"2006-1-2T15:04:05Z",
|
||||
"2006-1-2 15:04:05",
|
||||
"2006-1-2",
|
||||
"2006-01-02T15:04:05-07:00Z",
|
||||
"2006-1-02T15:04:05Z",
|
||||
"2006-01-02T15:04Z",
|
||||
"2006-01-02T15:04-07:00",
|
||||
"2006-01-02T15:04:05Z",
|
||||
"2006-01-02T15:04:05-07:00:00",
|
||||
"2006-01-02T15:04:05:-0700",
|
||||
"2006-01-02T15:04:05-0700",
|
||||
"2006-01-02T15:04:05-07:00",
|
||||
"2006-01-02T15:04:05 -0700",
|
||||
"2006-01-02T15:04:05:00",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02T15:04",
|
||||
"2006-01-02 at 15:04:05",
|
||||
"2006-01-02 15:04:05Z",
|
||||
"2006-01-02 15:04:05 MST",
|
||||
"2006-01-02 15:04:05-0700",
|
||||
"2006-01-02 15:04:05-07:00",
|
||||
"2006-01-02 15:04:05 -0700",
|
||||
"2006-01-02 15:04",
|
||||
"2006-01-02 00:00:00.0 15:04:05.0 -0700",
|
||||
"2006/01/02",
|
||||
"2006-01-02",
|
||||
"15:04 02.01.2006 -0700",
|
||||
"1/2/2006 3:04 PM MST",
|
||||
"1/2/2006 3:04:05 PM MST",
|
||||
"1/2/2006 3:04:05 PM",
|
||||
"1/2/2006 15:04:05 MST",
|
||||
"1/2/2006",
|
||||
"06/1/2 15:04",
|
||||
"06-1-2 15:04",
|
||||
"02 Monday, Jan 2006 15:04",
|
||||
"02 Jan 2006 15:04 MST",
|
||||
"02 Jan 2006 15:04:05 UT",
|
||||
"02 Jan 2006 15:04:05 MST",
|
||||
"02 Jan 2006 15:04:05 -0700",
|
||||
"02 Jan 2006 15:04:05",
|
||||
"02 Jan 2006",
|
||||
"02/01/2006 15:04 MST",
|
||||
"02-01-2006 15:04:05 MST",
|
||||
"02.01.2006 15:04:05",
|
||||
"02/01/2006 15:04:05",
|
||||
"02.01.2006 15:04",
|
||||
"02/01/2006 - 15:04",
|
||||
"02.01.2006 -0700",
|
||||
"02/01/2006",
|
||||
"02-01-2006",
|
||||
"01/02/2006 3:04 PM",
|
||||
"01/02/2006 15:04:05 MST",
|
||||
"01/02/2006 - 15:04",
|
||||
"01/02/2006",
|
||||
"01-02-2006",
|
||||
"Jan. 2006",
|
||||
"Jan. 2, 2006, 03:04 p.m.",
|
||||
"2006-01-02 15:04:05 -07:00",
|
||||
"2 January, 2006",
|
||||
}
|
||||
|
||||
var defaultTime = time.Time{}
|
||||
|
||||
func dateParse(line string) time.Time {
|
||||
if line == "" {
|
||||
return defaultTime
|
||||
}
|
||||
for _, layout := range dateFormats {
|
||||
if t, err := time.Parse(layout, line); err == nil {
|
||||
return t
|
||||
}
|
||||
}
|
||||
return defaultTime
|
||||
}
|
Reference in New Issue
Block a user