populate search table

This commit is contained in:
Nazar Kanaev 2020-07-15 00:21:44 +01:00
parent 9d10d3a7d4
commit 3c4d48fdc7
4 changed files with 62 additions and 2 deletions

View File

@ -1,3 +1,3 @@
#!/bin/sh #!/bin/sh
CGO_ENABLED=1 go build -tags sqlite_foreign_keys CGO_ENABLED=1 go build -tags "sqlite_foreign_keys sqlite_fts5"

1
go.mod
View File

@ -6,4 +6,5 @@ require (
github.com/PuerkitoBio/goquery v1.5.1 github.com/PuerkitoBio/goquery v1.5.1
github.com/mattn/go-sqlite3 v1.14.0 github.com/mattn/go-sqlite3 v1.14.0
github.com/mmcdole/gofeed v1.0.0 github.com/mmcdole/gofeed v1.0.0
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e
) )

View File

@ -37,7 +37,8 @@ func (h *Handler) startJobs() {
h.queueSize += val h.queueSize += val
} }
}() }()
h.fetchAllFeeds() go h.db.SyncSearch()
//h.fetchAllFeeds()
} }
func (h *Handler) fetchFeed(feed storage.Feed) { func (h *Handler) fetchFeed(feed storage.Feed) {

View File

@ -5,6 +5,7 @@ import (
"time" "time"
"strings" "strings"
"encoding/json" "encoding/json"
"golang.org/x/net/html"
) )
type ItemStatus int type ItemStatus int
@ -241,3 +242,60 @@ func (s *Storage) FeedStats() []FeedStat {
} }
return result return result
} }
func HTMLText(s string) string {
tokenizer := html.NewTokenizer(strings.NewReader(s))
contents := make([]string, 0)
for {
token := tokenizer.Next()
if token == html.ErrorToken {
break
}
if token == html.TextToken {
content := strings.TrimSpace(html.UnescapeString(string(tokenizer.Text())))
if len(content) > 0 {
contents = append(contents, content)
}
}
}
return strings.Join(contents, " ")
}
func (s *Storage) SyncSearch() {
rows, err := s.db.Query(`
select id, title, content, description
from items
where search_rowid is null;
`)
if err != nil {
s.log.Print(err)
return
}
items := make([]Item, 0)
for rows.Next() {
var item Item
rows.Scan(&item.Id, &item.Title, &item.Content, &item.Description)
fmt.Println(item)
items = append(items, item)
}
for _, item := range items {
result, err := s.db.Exec(`
insert into search (title, description, content) values (?, ?, ?)`,
item.Title, HTMLText(item.Description), HTMLText(item.Content),
)
if err != nil {
s.log.Print(err)
return
}
if numrows, err := result.RowsAffected(); err == nil && numrows == 1 {
if rowId, err := result.LastInsertId(); err == nil {
s.db.Exec(
`update items set search_rowid = ? where id = ?`,
rowId, item.Id,
)
}
}
}
}