mirror of
				https://github.com/nkanaev/yarr.git
				synced 2025-10-31 06:53:30 +00:00 
			
		
		
		
	basic atom 1.0 parser
This commit is contained in:
		
							
								
								
									
										87
									
								
								src/feed/atom.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								src/feed/atom.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | |||||||
|  | package feed | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"encoding/xml" | ||||||
|  | 	"html" | ||||||
|  | 	"io" | ||||||
|  | 	"strings" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | type atomFeed struct { | ||||||
|  | 	XMLName xml.Name    `xml:"http://www.w3.org/2005/Atom feed"` | ||||||
|  | 	ID      string      `xml:"id"` | ||||||
|  | 	Title   atomText    `xml:"title"` | ||||||
|  | 	Links   atomLinks   `xml:"link"` | ||||||
|  | 	Entries []atomEntry `xml:"entry"` | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type atomEntry struct { | ||||||
|  | 	ID        string    `xml:"id"` | ||||||
|  | 	Title     atomText  `xml:"title"` | ||||||
|  | 	Summary   atomText  `xml:"summary"` | ||||||
|  | 	Published string    `xml:"published"` | ||||||
|  | 	Updated   string    `xml:"updated"` | ||||||
|  | 	Links     atomLinks `xml:"link"` | ||||||
|  | 	Content   atomText  `xml:"content"` | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type atomText struct { | ||||||
|  | 	Type string `xml:"type,attr"` | ||||||
|  | 	Data string `xml:",chardata"` | ||||||
|  | 	XML  string `xml:",innerxml"` | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type atomLink struct { | ||||||
|  | 	Href string `xml:"href,attr"` | ||||||
|  | 	Rel  string `xml:"rel,attr"` | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type atomLinks []atomLink | ||||||
|  |  | ||||||
|  | func (a *atomText) String() string { | ||||||
|  | 	data := a.Data | ||||||
|  | 	if a.Type == "xhtml" { | ||||||
|  | 		data = a.XML | ||||||
|  | 	} | ||||||
|  | 	return html.UnescapeString(strings.TrimSpace(data)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (links atomLinks) First(rel string) string { | ||||||
|  | 	for _, l := range links { | ||||||
|  | 		if l.Rel == rel { | ||||||
|  | 			return l.Href | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return "" | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ParseAtom(r io.Reader) (*Feed, error) { | ||||||
|  | 	f := atomFeed{} | ||||||
|  |  | ||||||
|  | 	decoder := xml.NewDecoder(r) | ||||||
|  | 	if err := decoder.Decode(&f); err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	feed := &Feed{ | ||||||
|  | 		Title:   f.Title.String(), | ||||||
|  | 		SiteURL: first(f.Links.First("alternate"), f.Links.First("")), | ||||||
|  | 		FeedURL: f.Links.First("self"), | ||||||
|  | 	} | ||||||
|  | 	for _, e := range f.Entries { | ||||||
|  | 		date, _ := dateParse(first(e.Published, e.Updated)) | ||||||
|  | 		imageUrl := "" | ||||||
|  | 		podcastUrl := "" | ||||||
|  |  | ||||||
|  | 		feed.Items = append(feed.Items, Item{ | ||||||
|  | 			GUID:       first(e.ID), | ||||||
|  | 			Date:       date, | ||||||
|  | 			URL:        first(e.Links.First("alternate"), f.Links.First("")), | ||||||
|  | 			Title:      e.Title.String(), | ||||||
|  | 			Content:    e.Content.String(), | ||||||
|  | 			ImageURL:   imageUrl, | ||||||
|  | 			PodcastURL: podcastUrl, | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | 	return feed, nil | ||||||
|  | } | ||||||
							
								
								
									
										59
									
								
								src/feed/atom_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								src/feed/atom_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | |||||||
|  | package feed | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"reflect" | ||||||
|  | 	"strings" | ||||||
|  | 	"testing" | ||||||
|  | 	"time" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | func TestAtom(t *testing.T) { | ||||||
|  | 	have, _ := ParseAtom(strings.NewReader(` | ||||||
|  | 		<?xml version="1.0" encoding="utf-8"?> | ||||||
|  | 		<feed xmlns="http://www.w3.org/2005/Atom"> | ||||||
|  | 			<title>Example Feed</title> | ||||||
|  | 			<subtitle>A subtitle.</subtitle> | ||||||
|  | 			<link href="http://example.org/feed/" rel="self" /> | ||||||
|  | 			<link href="http://example.org/" /> | ||||||
|  | 			<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id> | ||||||
|  | 			<updated>2003-12-13T18:30:02Z</updated> | ||||||
|  | 			<entry> | ||||||
|  | 				<title>Atom-Powered Robots Run Amok</title> | ||||||
|  | 				<link href="http://example.org/2003/12/13/atom03" /> | ||||||
|  | 				<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/> | ||||||
|  | 				<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/> | ||||||
|  | 				<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id> | ||||||
|  | 				<updated>2003-12-13T18:30:02Z</updated> | ||||||
|  | 				<summary>Some text.</summary> | ||||||
|  | 				<content type="xhtml"> | ||||||
|  | 					<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div> | ||||||
|  | 				</content> | ||||||
|  | 				<author> | ||||||
|  | 					<name>John Doe</name> | ||||||
|  | 					<email>johndoe@example.com</email> | ||||||
|  | 				</author> | ||||||
|  | 			</entry> | ||||||
|  | 		</feed> | ||||||
|  | 	`)) | ||||||
|  | 	want := &Feed{ | ||||||
|  | 		Title:   "Example Feed", | ||||||
|  | 		SiteURL: "http://example.org/", | ||||||
|  | 		FeedURL: "http://example.org/feed/", | ||||||
|  | 		Items: []Item{ | ||||||
|  | 			{ | ||||||
|  | 				GUID:       "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a", | ||||||
|  | 				Date:       time.Unix(1071340202, 0).UTC(), | ||||||
|  | 				URL:        "http://example.org/2003/12/13/atom03.html", | ||||||
|  | 				Title:      "Atom-Powered Robots Run Amok", | ||||||
|  | 				Content:    `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`, | ||||||
|  | 				ImageURL:   "", | ||||||
|  | 				PodcastURL: "", | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 	if !reflect.DeepEqual(want, have) { | ||||||
|  | 		t.Logf("want: %#v", want) | ||||||
|  | 		t.Logf("have: %#v", have) | ||||||
|  | 		t.Fatal("invalid atom") | ||||||
|  | 	} | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user