switch to fts5, rework search syncing

This commit is contained in:
nkanaev
2026-06-03 13:56:02 +01:00
parent 2f263e9803
commit 138b5ad991
7 changed files with 133 additions and 49 deletions

View File

@@ -1,7 +1,7 @@
VERSION=2.6
GITHASH=$(shell git rev-parse --short=8 HEAD)
GO_TAGS = sqlite_foreign_keys sqlite_json
GO_TAGS = sqlite_foreign_keys sqlite_json sqlite_fts5
GO_LDFLAGS = -s -w -X 'main.Version=$(VERSION)' -X 'main.GitHash=$(GITHASH)'
GO_FLAGS = -tags "$(GO_TAGS)" -ldflags="$(GO_LDFLAGS)"

View File

@@ -257,7 +257,6 @@ func (s *Server) handleFeedList(c *router.Context) {
items := worker.ConvertItems(result.Feed.Items, *feed)
if len(items) > 0 {
s.db.CreateItems(items)
s.db.SyncSearch()
}
s.worker.FindFeedFavicon(*feed)

View File

@@ -9,8 +9,6 @@ import (
"sort"
"strings"
"time"
"github.com/nkanaev/yarr/src/content/htmlutil"
)
type ItemStatus int
@@ -195,7 +193,7 @@ func listQueryPredicate(filter ItemFilter, newestFirst bool) (string, []any) {
cond = append(
cond,
"i.search_rowid in (select rowid from search where search match :search)",
"i.id in (select rowid as id from search where search match :search)",
)
args = append(args, sql.Named("search", strings.Join(terms, " ")))
}
@@ -379,46 +377,6 @@ func (s *Storage) FeedStats() []FeedStat {
return result
}
func (s *Storage) SyncSearch() {
rows, err := s.db.Query(`
select id, title, content
from items
where search_rowid is null;
`)
if err != nil {
log.Print(err)
return
}
items := make([]Item, 0)
for rows.Next() {
var item Item
rows.Scan(&item.Id, &item.Title, &item.Content)
items = append(items, item)
}
for _, item := range items {
result, err := s.db.Exec(`
insert into search (title, description, content) values (:title, "", :content)`,
sql.Named("title", item.Title),
sql.Named("content", htmlutil.ExtractText(item.Content)),
)
if err != nil {
log.Print(err)
return
}
if numrows, err := result.RowsAffected(); err == nil && numrows == 1 {
if rowId, err := result.LastInsertId(); err == nil {
s.db.Exec(
`update items set search_rowid = :search_rowid where id = :id`,
sql.Named("search_rowid", rowId),
sql.Named("id", item.Id),
)
}
}
}
}
var (
itemsKeepSize = 50
itemsKeepDays = 90

View File

@@ -212,7 +212,6 @@ func TestListItems(t *testing.T) {
}
// filter by search
db.SyncSearch()
search1 := "title111"
have = getItemGuids(db.ListItems(ItemFilter{Search: &search1}, 4, true, false))
want = []string{"item111"}
@@ -432,3 +431,85 @@ func TestCreateItemsLastArrived(t *testing.T) {
}
})
}
func TestSearch(t *testing.T) {
db := testDB()
defer db.Close()
feed := db.CreateFeed(CreateFeedParams{Title: "f", FeedLink: "http://f.xml"})
db.CreateItems([]Item{
{
GUID: "i1",
FeedId: feed.Id,
Title: "Hello World",
Content: "This is a <b>test</b> of the <i>emergency</i> broadcast system.",
},
{
GUID: "i2",
FeedId: feed.Id,
Title: "FTS5 Unicode",
Content: "Unicode support with characters like: Привет, 世界, 🚀",
},
{
GUID: "i3",
FeedId: feed.Id,
Title: "Hidden Tag",
Content: `<div class="secret-class">Don't find me by my class name</div>`,
},
})
// 1. Basic search
s1 := "emergency"
have := getItemGuids(db.ListItems(ItemFilter{Search: &s1}, 10, true, false))
if !reflect.DeepEqual(have, []string{"i1"}) {
t.Errorf("basic search failed: expected [i1], got %v", have)
}
// 2. HTML stripping: Should find text, but NOT the tags
s2 := "test"
have = getItemGuids(db.ListItems(ItemFilter{Search: &s2}, 10, true, false))
if !reflect.DeepEqual(have, []string{"i1"}) {
t.Errorf("html text search failed: expected [i1], got %v", have)
}
s3 := "secret-class"
have = getItemGuids(db.ListItems(ItemFilter{Search: &s3}, 10, true, false))
if len(have) > 0 {
t.Errorf("html tag search should have failed but found: %v", have)
}
// 3. Multi-word (AND)
s4 := "broadcast system"
have = getItemGuids(db.ListItems(ItemFilter{Search: &s4}, 10, true, false))
if !reflect.DeepEqual(have, []string{"i1"}) {
t.Errorf("multi-word search failed: expected [i1], got %v", have)
}
// 4. Unicode
s5 := "Привет"
have = getItemGuids(db.ListItems(ItemFilter{Search: &s5}, 10, true, false))
if !reflect.DeepEqual(have, []string{"i2"}) {
t.Errorf("unicode search failed: expected [i2], got %v", have)
}
s6 := "世界"
have = getItemGuids(db.ListItems(ItemFilter{Search: &s6}, 10, true, false))
if !reflect.DeepEqual(have, []string{"i2"}) {
t.Errorf("unicode search (CJK) failed: expected [i2], got %v", have)
}
// 5. Trigger: Update
db.db.Exec("update items set title = 'Updated Title' where guid = 'i1'")
s7 := "Updated"
have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
if !reflect.DeepEqual(have, []string{"i1"}) {
t.Errorf("update trigger failed: expected [i1], got %v", have)
}
// 6. Trigger: Delete
db.db.Exec("delete from items where guid = 'i1'")
have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
if len(have) > 0 {
t.Errorf("delete trigger failed: found deleted item: %v", have)
}
}

View File

@@ -21,6 +21,7 @@ var migrations = []func(*sql.Tx) error{
m11_add_item_last_arrived,
m12_remove_feed_sizes,
m13_consolidate_feed_states,
m14_upgrade_fts5,
}
var maxVersion = int64(len(migrations))
@@ -382,3 +383,40 @@ func m13_consolidate_feed_states(tx *sql.Tx) error {
_, err := tx.Exec(sql)
return err
}
func m14_upgrade_fts5(tx *sql.Tx) error {
sql := `
-- 1. Drop old FTS4 table and trigger
drop table if exists search;
drop trigger if exists del_item_search;
-- 2. Remove search_rowid from items
drop index if exists idx_item_search_rowid;
alter table items drop column search_rowid;
-- 3. Create FTS5 virtual table
create virtual table search using fts5(
title, content,
content='items',
content_rowid='id',
tokenize='unicode61'
);
-- 4. Create triggers for automatic FTS sync
create trigger items_ai after insert on items begin
insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
end;
create trigger items_ad after delete on items begin
insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
end;
create trigger items_au after update on items begin
insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
end;
-- 5. Populate FTS5 table with existing data
insert into search(rowid, title, content) select id, title, strip_html(content) from items;
`
_, err := tx.Exec(sql)
return err
}

View File

@@ -5,9 +5,18 @@ import (
"log"
"strings"
_ "github.com/mattn/go-sqlite3"
"github.com/mattn/go-sqlite3"
"github.com/nkanaev/yarr/src/content/htmlutil"
)
func init() {
sql.Register("sqlite3_yarr", &sqlite3.SQLiteDriver{
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
return conn.RegisterFunc("strip_html", htmlutil.ExtractText, true)
},
})
}
type Storage struct {
db *sql.DB
}
@@ -28,7 +37,7 @@ func New(path string) (*Storage, error) {
path = path + "?" + params
}
db, err := sql.Open("sqlite3", path)
db, err := sql.Open("sqlite3_yarr", path)
if err != nil {
return nil, err
}

View File

@@ -118,7 +118,6 @@ func (w *Worker) refresher(feeds []storage.Feed) {
w.db.CreateItems(items)
}
atomic.AddInt32(w.pending, -1)
w.db.SyncSearch()
}
close(srcqueue)
close(dstqueue)