diff --git a/src/htmlutil/query.go b/src/content/htmlutil/query.go
similarity index 100%
rename from src/htmlutil/query.go
rename to src/content/htmlutil/query.go
diff --git a/src/htmlutil/query_test.go b/src/content/htmlutil/query_test.go
similarity index 100%
rename from src/htmlutil/query_test.go
rename to src/content/htmlutil/query_test.go
diff --git a/src/scraper/utils.go b/src/content/htmlutil/urlutils.go
similarity index 70%
rename from src/scraper/utils.go
rename to src/content/htmlutil/urlutils.go
index 6db70f8..f5c02ac 100644
--- a/src/scraper/utils.go
+++ b/src/content/htmlutil/urlutils.go
@@ -1,10 +1,10 @@
-package scraper
+package htmlutil
import (
"net/url"
)
-func any(els []string, el string, match func(string, string) bool) bool {
+func Any(els []string, el string, match func(string, string) bool) bool {
for _, x := range els {
if match(x, el) {
return true
@@ -13,7 +13,7 @@ func any(els []string, el string, match func(string, string) bool) bool {
return false
}
-func absoluteUrl(href, base string) string {
+func AbsoluteUrl(href, base string) string {
baseUrl, err := url.Parse(base)
if err != nil {
return ""
@@ -25,7 +25,7 @@ func absoluteUrl(href, base string) string {
return baseUrl.ResolveReference(hrefUrl).String()
}
-func urlDomain(val string) string {
+func URLDomain(val string) string {
if u, err := url.Parse(val); err == nil {
return u.Host
}
diff --git a/src/htmlutil/utils.go b/src/content/htmlutil/utils.go
similarity index 100%
rename from src/htmlutil/utils.go
rename to src/content/htmlutil/utils.go
diff --git a/src/reader/LICENSE b/src/content/readability/LICENSE
similarity index 100%
rename from src/reader/LICENSE
rename to src/content/readability/LICENSE
diff --git a/src/reader/readability.go b/src/content/readability/readability.go
similarity index 99%
rename from src/reader/readability.go
rename to src/content/readability/readability.go
index 992a5e7..37b7304 100644
--- a/src/reader/readability.go
+++ b/src/content/readability/readability.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
-package reader
+package readability
import (
"bytes"
@@ -13,7 +13,7 @@ import (
"regexp"
"strings"
- "github.com/nkanaev/yarr/src/htmlutil"
+ "github.com/nkanaev/yarr/src/content/htmlutil"
"golang.org/x/net/html"
)
diff --git a/src/scraper/sanitizer.go b/src/content/sanitizer/sanitizer.go
similarity index 97%
rename from src/scraper/sanitizer.go
rename to src/content/sanitizer/sanitizer.go
index 1f81009..32b3ce1 100644
--- a/src/scraper/sanitizer.go
+++ b/src/content/sanitizer/sanitizer.go
@@ -12,6 +12,7 @@ import (
"strconv"
"strings"
+ "github.com/nkanaev/yarr/src/content/htmlutil"
"golang.org/x/net/html"
)
@@ -116,7 +117,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
} else if tagName == "img" && attribute.Key == "src" && isValidDataAttribute(attribute.Val) {
value = attribute.Val
} else {
- value = absoluteUrl(value, baseURL)
+ value = htmlutil.AbsoluteUrl(value, baseURL)
if value == "" {
continue
}
@@ -294,9 +295,9 @@ func isValidIframeSource(baseURL, src string) bool {
"www.youtube.com",
}
- domain := urlDomain(src)
+ domain := htmlutil.URLDomain(src)
// allow iframe from same origin
- if urlDomain(baseURL) == domain {
+ if htmlutil.URLDomain(baseURL) == domain {
return true
}
@@ -416,7 +417,7 @@ func sanitizeSrcsetAttr(baseURL, value string) string {
if nbParts > 0 {
sanitizedSource := parts[0]
if !strings.HasPrefix(parts[0], "data:") {
- sanitizedSource = absoluteUrl(parts[0], baseURL)
+ sanitizedSource = htmlutil.AbsoluteUrl(parts[0], baseURL)
if sanitizedSource == "" {
continue
}
diff --git a/src/scraper/sanitizer_test.go b/src/content/sanitizer/sanitizer_test.go
similarity index 100%
rename from src/scraper/sanitizer_test.go
rename to src/content/sanitizer/sanitizer_test.go
diff --git a/src/scraper/finder.go b/src/content/scraper/finder.go
similarity index 90%
rename from src/scraper/finder.go
rename to src/content/scraper/finder.go
index 96faf58..fa1a9af 100644
--- a/src/scraper/finder.go
+++ b/src/content/scraper/finder.go
@@ -3,7 +3,7 @@ package scraper
import (
"strings"
- "github.com/nkanaev/yarr/src/htmlutil"
+ "github.com/nkanaev/yarr/src/content/htmlutil"
"golang.org/x/net/html"
)
@@ -32,7 +32,7 @@ func FindFeeds(body string, base string) map[string]string {
for _, node := range htmlutil.FindNodes(doc, isFeedLink) {
href := htmlutil.Attr(node, "href")
name := htmlutil.Attr(node, "title")
- link := absoluteUrl(href, base)
+ link := htmlutil.AbsoluteUrl(href, base)
if link != "" {
candidates[link] = name
}
@@ -63,7 +63,7 @@ func FindFeeds(body string, base string) map[string]string {
}
for _, node := range htmlutil.FindNodes(doc, isFeedHyperLink) {
href := htmlutil.Attr(node, "href")
- link := absoluteUrl(href, base)
+ link := htmlutil.AbsoluteUrl(href, base)
if link != "" {
candidates[link] = ""
}
@@ -89,7 +89,7 @@ func FindIcons(body string, base string) []string {
rels := strings.Split(htmlutil.Attr(node, "rel"), " ")
for _, rel := range rels {
if strings.EqualFold(rel, "icon") {
- icons = append(icons, absoluteUrl(htmlutil.Attr(node, "href"), base))
+ icons = append(icons, htmlutil.AbsoluteUrl(htmlutil.Attr(node, "href"), base))
}
}
}
diff --git a/src/scraper/finder_test.go b/src/content/scraper/finder_test.go
similarity index 100%
rename from src/scraper/finder_test.go
rename to src/content/scraper/finder_test.go
diff --git a/src/server/routes.go b/src/server/routes.go
index 2f9034b..0d236d3 100644
--- a/src/server/routes.go
+++ b/src/server/routes.go
@@ -9,10 +9,10 @@ import (
"github.com/nkanaev/yarr/src/assets"
"github.com/nkanaev/yarr/src/auth"
+ "github.com/nkanaev/yarr/src/content/readability"
+ "github.com/nkanaev/yarr/src/content/sanitizer"
"github.com/nkanaev/yarr/src/opml"
- "github.com/nkanaev/yarr/src/reader"
"github.com/nkanaev/yarr/src/router"
- "github.com/nkanaev/yarr/src/scraper"
"github.com/nkanaev/yarr/src/storage"
"github.com/nkanaev/yarr/src/worker"
)
@@ -419,7 +419,7 @@ func (s *Server) handlePageCrawl(c *router.Context) {
return
}
defer res.Body.Close()
- content, err := reader.ExtractContent(res.Body)
+ content, err := readability.ExtractContent(res.Body)
if err != nil {
log.Print(err)
c.Out.WriteHeader(http.StatusNoContent)
diff --git a/src/worker/crawler.go b/src/worker/crawler.go
index b05a2c6..508d3cd 100644
--- a/src/worker/crawler.go
+++ b/src/worker/crawler.go
@@ -8,7 +8,7 @@ import (
"net/http"
"net/url"
- "github.com/nkanaev/yarr/src/scraper"
+ "github.com/nkanaev/yarr/src/content/scraper"
"github.com/nkanaev/yarr/src/parser"
"github.com/nkanaev/yarr/src/storage"
"golang.org/x/net/html/charset"