mirror of
https://github.com/nkanaev/yarr.git
synced 2026-06-09 18:03:19 +00:00
switch to fts5, rework search syncing
This commit is contained in:
2
makefile
2
makefile
@@ -1,7 +1,7 @@
|
|||||||
VERSION=2.6
|
VERSION=2.6
|
||||||
GITHASH=$(shell git rev-parse --short=8 HEAD)
|
GITHASH=$(shell git rev-parse --short=8 HEAD)
|
||||||
|
|
||||||
GO_TAGS = sqlite_foreign_keys sqlite_json
|
GO_TAGS = sqlite_foreign_keys sqlite_json sqlite_fts5
|
||||||
GO_LDFLAGS = -s -w -X 'main.Version=$(VERSION)' -X 'main.GitHash=$(GITHASH)'
|
GO_LDFLAGS = -s -w -X 'main.Version=$(VERSION)' -X 'main.GitHash=$(GITHASH)'
|
||||||
|
|
||||||
GO_FLAGS = -tags "$(GO_TAGS)" -ldflags="$(GO_LDFLAGS)"
|
GO_FLAGS = -tags "$(GO_TAGS)" -ldflags="$(GO_LDFLAGS)"
|
||||||
|
|||||||
@@ -257,7 +257,6 @@ func (s *Server) handleFeedList(c *router.Context) {
|
|||||||
items := worker.ConvertItems(result.Feed.Items, *feed)
|
items := worker.ConvertItems(result.Feed.Items, *feed)
|
||||||
if len(items) > 0 {
|
if len(items) > 0 {
|
||||||
s.db.CreateItems(items)
|
s.db.CreateItems(items)
|
||||||
s.db.SyncSearch()
|
|
||||||
}
|
}
|
||||||
s.worker.FindFeedFavicon(*feed)
|
s.worker.FindFeedFavicon(*feed)
|
||||||
|
|
||||||
|
|||||||
@@ -9,8 +9,6 @@ import (
|
|||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/nkanaev/yarr/src/content/htmlutil"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type ItemStatus int
|
type ItemStatus int
|
||||||
@@ -195,7 +193,7 @@ func listQueryPredicate(filter ItemFilter, newestFirst bool) (string, []any) {
|
|||||||
|
|
||||||
cond = append(
|
cond = append(
|
||||||
cond,
|
cond,
|
||||||
"i.search_rowid in (select rowid from search where search match :search)",
|
"i.id in (select rowid as id from search where search match :search)",
|
||||||
)
|
)
|
||||||
args = append(args, sql.Named("search", strings.Join(terms, " ")))
|
args = append(args, sql.Named("search", strings.Join(terms, " ")))
|
||||||
}
|
}
|
||||||
@@ -379,46 +377,6 @@ func (s *Storage) FeedStats() []FeedStat {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Storage) SyncSearch() {
|
|
||||||
rows, err := s.db.Query(`
|
|
||||||
select id, title, content
|
|
||||||
from items
|
|
||||||
where search_rowid is null;
|
|
||||||
`)
|
|
||||||
if err != nil {
|
|
||||||
log.Print(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
items := make([]Item, 0)
|
|
||||||
for rows.Next() {
|
|
||||||
var item Item
|
|
||||||
rows.Scan(&item.Id, &item.Title, &item.Content)
|
|
||||||
items = append(items, item)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, item := range items {
|
|
||||||
result, err := s.db.Exec(`
|
|
||||||
insert into search (title, description, content) values (:title, "", :content)`,
|
|
||||||
sql.Named("title", item.Title),
|
|
||||||
sql.Named("content", htmlutil.ExtractText(item.Content)),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
log.Print(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if numrows, err := result.RowsAffected(); err == nil && numrows == 1 {
|
|
||||||
if rowId, err := result.LastInsertId(); err == nil {
|
|
||||||
s.db.Exec(
|
|
||||||
`update items set search_rowid = :search_rowid where id = :id`,
|
|
||||||
sql.Named("search_rowid", rowId),
|
|
||||||
sql.Named("id", item.Id),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
itemsKeepSize = 50
|
itemsKeepSize = 50
|
||||||
itemsKeepDays = 90
|
itemsKeepDays = 90
|
||||||
|
|||||||
@@ -212,7 +212,6 @@ func TestListItems(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// filter by search
|
// filter by search
|
||||||
db.SyncSearch()
|
|
||||||
search1 := "title111"
|
search1 := "title111"
|
||||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &search1}, 4, true, false))
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &search1}, 4, true, false))
|
||||||
want = []string{"item111"}
|
want = []string{"item111"}
|
||||||
@@ -432,3 +431,85 @@ func TestCreateItemsLastArrived(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSearch(t *testing.T) {
|
||||||
|
db := testDB()
|
||||||
|
defer db.Close()
|
||||||
|
feed := db.CreateFeed(CreateFeedParams{Title: "f", FeedLink: "http://f.xml"})
|
||||||
|
|
||||||
|
db.CreateItems([]Item{
|
||||||
|
{
|
||||||
|
GUID: "i1",
|
||||||
|
FeedId: feed.Id,
|
||||||
|
Title: "Hello World",
|
||||||
|
Content: "This is a <b>test</b> of the <i>emergency</i> broadcast system.",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
GUID: "i2",
|
||||||
|
FeedId: feed.Id,
|
||||||
|
Title: "FTS5 Unicode",
|
||||||
|
Content: "Unicode support with characters like: Привет, 世界, 🚀",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
GUID: "i3",
|
||||||
|
FeedId: feed.Id,
|
||||||
|
Title: "Hidden Tag",
|
||||||
|
Content: `<div class="secret-class">Don't find me by my class name</div>`,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
// 1. Basic search
|
||||||
|
s1 := "emergency"
|
||||||
|
have := getItemGuids(db.ListItems(ItemFilter{Search: &s1}, 10, true, false))
|
||||||
|
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||||
|
t.Errorf("basic search failed: expected [i1], got %v", have)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. HTML stripping: Should find text, but NOT the tags
|
||||||
|
s2 := "test"
|
||||||
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &s2}, 10, true, false))
|
||||||
|
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||||
|
t.Errorf("html text search failed: expected [i1], got %v", have)
|
||||||
|
}
|
||||||
|
|
||||||
|
s3 := "secret-class"
|
||||||
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &s3}, 10, true, false))
|
||||||
|
if len(have) > 0 {
|
||||||
|
t.Errorf("html tag search should have failed but found: %v", have)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Multi-word (AND)
|
||||||
|
s4 := "broadcast system"
|
||||||
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &s4}, 10, true, false))
|
||||||
|
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||||
|
t.Errorf("multi-word search failed: expected [i1], got %v", have)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Unicode
|
||||||
|
s5 := "Привет"
|
||||||
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &s5}, 10, true, false))
|
||||||
|
if !reflect.DeepEqual(have, []string{"i2"}) {
|
||||||
|
t.Errorf("unicode search failed: expected [i2], got %v", have)
|
||||||
|
}
|
||||||
|
|
||||||
|
s6 := "世界"
|
||||||
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &s6}, 10, true, false))
|
||||||
|
if !reflect.DeepEqual(have, []string{"i2"}) {
|
||||||
|
t.Errorf("unicode search (CJK) failed: expected [i2], got %v", have)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Trigger: Update
|
||||||
|
db.db.Exec("update items set title = 'Updated Title' where guid = 'i1'")
|
||||||
|
s7 := "Updated"
|
||||||
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
|
||||||
|
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||||
|
t.Errorf("update trigger failed: expected [i1], got %v", have)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Trigger: Delete
|
||||||
|
db.db.Exec("delete from items where guid = 'i1'")
|
||||||
|
have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
|
||||||
|
if len(have) > 0 {
|
||||||
|
t.Errorf("delete trigger failed: found deleted item: %v", have)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ var migrations = []func(*sql.Tx) error{
|
|||||||
m11_add_item_last_arrived,
|
m11_add_item_last_arrived,
|
||||||
m12_remove_feed_sizes,
|
m12_remove_feed_sizes,
|
||||||
m13_consolidate_feed_states,
|
m13_consolidate_feed_states,
|
||||||
|
m14_upgrade_fts5,
|
||||||
}
|
}
|
||||||
|
|
||||||
var maxVersion = int64(len(migrations))
|
var maxVersion = int64(len(migrations))
|
||||||
@@ -382,3 +383,40 @@ func m13_consolidate_feed_states(tx *sql.Tx) error {
|
|||||||
_, err := tx.Exec(sql)
|
_, err := tx.Exec(sql)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func m14_upgrade_fts5(tx *sql.Tx) error {
|
||||||
|
sql := `
|
||||||
|
-- 1. Drop old FTS4 table and trigger
|
||||||
|
drop table if exists search;
|
||||||
|
drop trigger if exists del_item_search;
|
||||||
|
|
||||||
|
-- 2. Remove search_rowid from items
|
||||||
|
drop index if exists idx_item_search_rowid;
|
||||||
|
alter table items drop column search_rowid;
|
||||||
|
|
||||||
|
-- 3. Create FTS5 virtual table
|
||||||
|
create virtual table search using fts5(
|
||||||
|
title, content,
|
||||||
|
content='items',
|
||||||
|
content_rowid='id',
|
||||||
|
tokenize='unicode61'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- 4. Create triggers for automatic FTS sync
|
||||||
|
create trigger items_ai after insert on items begin
|
||||||
|
insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
|
||||||
|
end;
|
||||||
|
create trigger items_ad after delete on items begin
|
||||||
|
insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
|
||||||
|
end;
|
||||||
|
create trigger items_au after update on items begin
|
||||||
|
insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
|
||||||
|
insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
|
||||||
|
end;
|
||||||
|
|
||||||
|
-- 5. Populate FTS5 table with existing data
|
||||||
|
insert into search(rowid, title, content) select id, title, strip_html(content) from items;
|
||||||
|
`
|
||||||
|
_, err := tx.Exec(sql)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,9 +5,18 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
_ "github.com/mattn/go-sqlite3"
|
"github.com/mattn/go-sqlite3"
|
||||||
|
"github.com/nkanaev/yarr/src/content/htmlutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
sql.Register("sqlite3_yarr", &sqlite3.SQLiteDriver{
|
||||||
|
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
|
||||||
|
return conn.RegisterFunc("strip_html", htmlutil.ExtractText, true)
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
type Storage struct {
|
type Storage struct {
|
||||||
db *sql.DB
|
db *sql.DB
|
||||||
}
|
}
|
||||||
@@ -28,7 +37,7 @@ func New(path string) (*Storage, error) {
|
|||||||
path = path + "?" + params
|
path = path + "?" + params
|
||||||
}
|
}
|
||||||
|
|
||||||
db, err := sql.Open("sqlite3", path)
|
db, err := sql.Open("sqlite3_yarr", path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -118,7 +118,6 @@ func (w *Worker) refresher(feeds []storage.Feed) {
|
|||||||
w.db.CreateItems(items)
|
w.db.CreateItems(items)
|
||||||
}
|
}
|
||||||
atomic.AddInt32(w.pending, -1)
|
atomic.AddInt32(w.pending, -1)
|
||||||
w.db.SyncSearch()
|
|
||||||
}
|
}
|
||||||
close(srcqueue)
|
close(srcqueue)
|
||||||
close(dstqueue)
|
close(dstqueue)
|
||||||
|
|||||||
Reference in New Issue
Block a user