From 138b5ad9917954efdb483cb8538188fdc656c47a Mon Sep 17 00:00:00 2001 From: nkanaev Date: Wed, 3 Jun 2026 13:56:02 +0100 Subject: [PATCH] switch to fts5, rework search syncing --- makefile | 2 +- src/server/routes.go | 1 - src/storage/item.go | 44 +-------------------- src/storage/item_test.go | 83 +++++++++++++++++++++++++++++++++++++++- src/storage/migration.go | 38 ++++++++++++++++++ src/storage/storage.go | 13 ++++++- src/worker/worker.go | 1 - 7 files changed, 133 insertions(+), 49 deletions(-) diff --git a/makefile b/makefile index 2448b21..d4892b4 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,7 @@ VERSION=2.6 GITHASH=$(shell git rev-parse --short=8 HEAD) -GO_TAGS = sqlite_foreign_keys sqlite_json +GO_TAGS = sqlite_foreign_keys sqlite_json sqlite_fts5 GO_LDFLAGS = -s -w -X 'main.Version=$(VERSION)' -X 'main.GitHash=$(GITHASH)' GO_FLAGS = -tags "$(GO_TAGS)" -ldflags="$(GO_LDFLAGS)" diff --git a/src/server/routes.go b/src/server/routes.go index 985da91..764cbd8 100644 --- a/src/server/routes.go +++ b/src/server/routes.go @@ -257,7 +257,6 @@ func (s *Server) handleFeedList(c *router.Context) { items := worker.ConvertItems(result.Feed.Items, *feed) if len(items) > 0 { s.db.CreateItems(items) - s.db.SyncSearch() } s.worker.FindFeedFavicon(*feed) diff --git a/src/storage/item.go b/src/storage/item.go index 282e413..71b290f 100644 --- a/src/storage/item.go +++ b/src/storage/item.go @@ -9,8 +9,6 @@ import ( "sort" "strings" "time" - - "github.com/nkanaev/yarr/src/content/htmlutil" ) type ItemStatus int @@ -195,7 +193,7 @@ func listQueryPredicate(filter ItemFilter, newestFirst bool) (string, []any) { cond = append( cond, - "i.search_rowid in (select rowid from search where search match :search)", + "i.id in (select rowid as id from search where search match :search)", ) args = append(args, sql.Named("search", strings.Join(terms, " "))) } @@ -379,46 +377,6 @@ func (s *Storage) FeedStats() []FeedStat { return result } -func (s *Storage) SyncSearch() { - rows, err := s.db.Query(` - select id, title, content - from items - where search_rowid is null; - `) - if err != nil { - log.Print(err) - return - } - - items := make([]Item, 0) - for rows.Next() { - var item Item - rows.Scan(&item.Id, &item.Title, &item.Content) - items = append(items, item) - } - - for _, item := range items { - result, err := s.db.Exec(` - insert into search (title, description, content) values (:title, "", :content)`, - sql.Named("title", item.Title), - sql.Named("content", htmlutil.ExtractText(item.Content)), - ) - if err != nil { - log.Print(err) - return - } - if numrows, err := result.RowsAffected(); err == nil && numrows == 1 { - if rowId, err := result.LastInsertId(); err == nil { - s.db.Exec( - `update items set search_rowid = :search_rowid where id = :id`, - sql.Named("search_rowid", rowId), - sql.Named("id", item.Id), - ) - } - } - } -} - var ( itemsKeepSize = 50 itemsKeepDays = 90 diff --git a/src/storage/item_test.go b/src/storage/item_test.go index 7d76c20..9f3a8f0 100644 --- a/src/storage/item_test.go +++ b/src/storage/item_test.go @@ -212,7 +212,6 @@ func TestListItems(t *testing.T) { } // filter by search - db.SyncSearch() search1 := "title111" have = getItemGuids(db.ListItems(ItemFilter{Search: &search1}, 4, true, false)) want = []string{"item111"} @@ -432,3 +431,85 @@ func TestCreateItemsLastArrived(t *testing.T) { } }) } + +func TestSearch(t *testing.T) { + db := testDB() + defer db.Close() + feed := db.CreateFeed(CreateFeedParams{Title: "f", FeedLink: "http://f.xml"}) + + db.CreateItems([]Item{ + { + GUID: "i1", + FeedId: feed.Id, + Title: "Hello World", + Content: "This is a test of the emergency broadcast system.", + }, + { + GUID: "i2", + FeedId: feed.Id, + Title: "FTS5 Unicode", + Content: "Unicode support with characters like: Привет, 世界, 🚀", + }, + { + GUID: "i3", + FeedId: feed.Id, + Title: "Hidden Tag", + Content: `
Don't find me by my class name
`, + }, + }) + + // 1. Basic search + s1 := "emergency" + have := getItemGuids(db.ListItems(ItemFilter{Search: &s1}, 10, true, false)) + if !reflect.DeepEqual(have, []string{"i1"}) { + t.Errorf("basic search failed: expected [i1], got %v", have) + } + + // 2. HTML stripping: Should find text, but NOT the tags + s2 := "test" + have = getItemGuids(db.ListItems(ItemFilter{Search: &s2}, 10, true, false)) + if !reflect.DeepEqual(have, []string{"i1"}) { + t.Errorf("html text search failed: expected [i1], got %v", have) + } + + s3 := "secret-class" + have = getItemGuids(db.ListItems(ItemFilter{Search: &s3}, 10, true, false)) + if len(have) > 0 { + t.Errorf("html tag search should have failed but found: %v", have) + } + + // 3. Multi-word (AND) + s4 := "broadcast system" + have = getItemGuids(db.ListItems(ItemFilter{Search: &s4}, 10, true, false)) + if !reflect.DeepEqual(have, []string{"i1"}) { + t.Errorf("multi-word search failed: expected [i1], got %v", have) + } + + // 4. Unicode + s5 := "Привет" + have = getItemGuids(db.ListItems(ItemFilter{Search: &s5}, 10, true, false)) + if !reflect.DeepEqual(have, []string{"i2"}) { + t.Errorf("unicode search failed: expected [i2], got %v", have) + } + + s6 := "世界" + have = getItemGuids(db.ListItems(ItemFilter{Search: &s6}, 10, true, false)) + if !reflect.DeepEqual(have, []string{"i2"}) { + t.Errorf("unicode search (CJK) failed: expected [i2], got %v", have) + } + + // 5. Trigger: Update + db.db.Exec("update items set title = 'Updated Title' where guid = 'i1'") + s7 := "Updated" + have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false)) + if !reflect.DeepEqual(have, []string{"i1"}) { + t.Errorf("update trigger failed: expected [i1], got %v", have) + } + + // 6. Trigger: Delete + db.db.Exec("delete from items where guid = 'i1'") + have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false)) + if len(have) > 0 { + t.Errorf("delete trigger failed: found deleted item: %v", have) + } +} diff --git a/src/storage/migration.go b/src/storage/migration.go index 714f464..5099305 100644 --- a/src/storage/migration.go +++ b/src/storage/migration.go @@ -21,6 +21,7 @@ var migrations = []func(*sql.Tx) error{ m11_add_item_last_arrived, m12_remove_feed_sizes, m13_consolidate_feed_states, + m14_upgrade_fts5, } var maxVersion = int64(len(migrations)) @@ -382,3 +383,40 @@ func m13_consolidate_feed_states(tx *sql.Tx) error { _, err := tx.Exec(sql) return err } + +func m14_upgrade_fts5(tx *sql.Tx) error { + sql := ` + -- 1. Drop old FTS4 table and trigger + drop table if exists search; + drop trigger if exists del_item_search; + + -- 2. Remove search_rowid from items + drop index if exists idx_item_search_rowid; + alter table items drop column search_rowid; + + -- 3. Create FTS5 virtual table + create virtual table search using fts5( + title, content, + content='items', + content_rowid='id', + tokenize='unicode61' + ); + + -- 4. Create triggers for automatic FTS sync + create trigger items_ai after insert on items begin + insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content)); + end; + create trigger items_ad after delete on items begin + insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content)); + end; + create trigger items_au after update on items begin + insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content)); + insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content)); + end; + + -- 5. Populate FTS5 table with existing data + insert into search(rowid, title, content) select id, title, strip_html(content) from items; + ` + _, err := tx.Exec(sql) + return err +} diff --git a/src/storage/storage.go b/src/storage/storage.go index c933148..6b415a5 100644 --- a/src/storage/storage.go +++ b/src/storage/storage.go @@ -5,9 +5,18 @@ import ( "log" "strings" - _ "github.com/mattn/go-sqlite3" + "github.com/mattn/go-sqlite3" + "github.com/nkanaev/yarr/src/content/htmlutil" ) +func init() { + sql.Register("sqlite3_yarr", &sqlite3.SQLiteDriver{ + ConnectHook: func(conn *sqlite3.SQLiteConn) error { + return conn.RegisterFunc("strip_html", htmlutil.ExtractText, true) + }, + }) +} + type Storage struct { db *sql.DB } @@ -28,7 +37,7 @@ func New(path string) (*Storage, error) { path = path + "?" + params } - db, err := sql.Open("sqlite3", path) + db, err := sql.Open("sqlite3_yarr", path) if err != nil { return nil, err } diff --git a/src/worker/worker.go b/src/worker/worker.go index 355a72f..3e69a0f 100644 --- a/src/worker/worker.go +++ b/src/worker/worker.go @@ -118,7 +118,6 @@ func (w *Worker) refresher(feeds []storage.Feed) { w.db.CreateItems(items) } atomic.AddInt32(w.pending, -1) - w.db.SyncSearch() } close(srcqueue) close(dstqueue)