mirror of
https://github.com/nkanaev/yarr.git
synced 2026-06-09 18:03:19 +00:00
switch to fts5, rework search syncing
This commit is contained in:
2
makefile
2
makefile
@@ -1,7 +1,7 @@
|
||||
VERSION=2.6
|
||||
GITHASH=$(shell git rev-parse --short=8 HEAD)
|
||||
|
||||
GO_TAGS = sqlite_foreign_keys sqlite_json
|
||||
GO_TAGS = sqlite_foreign_keys sqlite_json sqlite_fts5
|
||||
GO_LDFLAGS = -s -w -X 'main.Version=$(VERSION)' -X 'main.GitHash=$(GITHASH)'
|
||||
|
||||
GO_FLAGS = -tags "$(GO_TAGS)" -ldflags="$(GO_LDFLAGS)"
|
||||
|
||||
@@ -257,7 +257,6 @@ func (s *Server) handleFeedList(c *router.Context) {
|
||||
items := worker.ConvertItems(result.Feed.Items, *feed)
|
||||
if len(items) > 0 {
|
||||
s.db.CreateItems(items)
|
||||
s.db.SyncSearch()
|
||||
}
|
||||
s.worker.FindFeedFavicon(*feed)
|
||||
|
||||
|
||||
@@ -9,8 +9,6 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/nkanaev/yarr/src/content/htmlutil"
|
||||
)
|
||||
|
||||
type ItemStatus int
|
||||
@@ -195,7 +193,7 @@ func listQueryPredicate(filter ItemFilter, newestFirst bool) (string, []any) {
|
||||
|
||||
cond = append(
|
||||
cond,
|
||||
"i.search_rowid in (select rowid from search where search match :search)",
|
||||
"i.id in (select rowid as id from search where search match :search)",
|
||||
)
|
||||
args = append(args, sql.Named("search", strings.Join(terms, " ")))
|
||||
}
|
||||
@@ -379,46 +377,6 @@ func (s *Storage) FeedStats() []FeedStat {
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *Storage) SyncSearch() {
|
||||
rows, err := s.db.Query(`
|
||||
select id, title, content
|
||||
from items
|
||||
where search_rowid is null;
|
||||
`)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return
|
||||
}
|
||||
|
||||
items := make([]Item, 0)
|
||||
for rows.Next() {
|
||||
var item Item
|
||||
rows.Scan(&item.Id, &item.Title, &item.Content)
|
||||
items = append(items, item)
|
||||
}
|
||||
|
||||
for _, item := range items {
|
||||
result, err := s.db.Exec(`
|
||||
insert into search (title, description, content) values (:title, "", :content)`,
|
||||
sql.Named("title", item.Title),
|
||||
sql.Named("content", htmlutil.ExtractText(item.Content)),
|
||||
)
|
||||
if err != nil {
|
||||
log.Print(err)
|
||||
return
|
||||
}
|
||||
if numrows, err := result.RowsAffected(); err == nil && numrows == 1 {
|
||||
if rowId, err := result.LastInsertId(); err == nil {
|
||||
s.db.Exec(
|
||||
`update items set search_rowid = :search_rowid where id = :id`,
|
||||
sql.Named("search_rowid", rowId),
|
||||
sql.Named("id", item.Id),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
itemsKeepSize = 50
|
||||
itemsKeepDays = 90
|
||||
|
||||
@@ -212,7 +212,6 @@ func TestListItems(t *testing.T) {
|
||||
}
|
||||
|
||||
// filter by search
|
||||
db.SyncSearch()
|
||||
search1 := "title111"
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &search1}, 4, true, false))
|
||||
want = []string{"item111"}
|
||||
@@ -432,3 +431,85 @@ func TestCreateItemsLastArrived(t *testing.T) {
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestSearch(t *testing.T) {
|
||||
db := testDB()
|
||||
defer db.Close()
|
||||
feed := db.CreateFeed(CreateFeedParams{Title: "f", FeedLink: "http://f.xml"})
|
||||
|
||||
db.CreateItems([]Item{
|
||||
{
|
||||
GUID: "i1",
|
||||
FeedId: feed.Id,
|
||||
Title: "Hello World",
|
||||
Content: "This is a <b>test</b> of the <i>emergency</i> broadcast system.",
|
||||
},
|
||||
{
|
||||
GUID: "i2",
|
||||
FeedId: feed.Id,
|
||||
Title: "FTS5 Unicode",
|
||||
Content: "Unicode support with characters like: Привет, 世界, 🚀",
|
||||
},
|
||||
{
|
||||
GUID: "i3",
|
||||
FeedId: feed.Id,
|
||||
Title: "Hidden Tag",
|
||||
Content: `<div class="secret-class">Don't find me by my class name</div>`,
|
||||
},
|
||||
})
|
||||
|
||||
// 1. Basic search
|
||||
s1 := "emergency"
|
||||
have := getItemGuids(db.ListItems(ItemFilter{Search: &s1}, 10, true, false))
|
||||
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||
t.Errorf("basic search failed: expected [i1], got %v", have)
|
||||
}
|
||||
|
||||
// 2. HTML stripping: Should find text, but NOT the tags
|
||||
s2 := "test"
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &s2}, 10, true, false))
|
||||
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||
t.Errorf("html text search failed: expected [i1], got %v", have)
|
||||
}
|
||||
|
||||
s3 := "secret-class"
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &s3}, 10, true, false))
|
||||
if len(have) > 0 {
|
||||
t.Errorf("html tag search should have failed but found: %v", have)
|
||||
}
|
||||
|
||||
// 3. Multi-word (AND)
|
||||
s4 := "broadcast system"
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &s4}, 10, true, false))
|
||||
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||
t.Errorf("multi-word search failed: expected [i1], got %v", have)
|
||||
}
|
||||
|
||||
// 4. Unicode
|
||||
s5 := "Привет"
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &s5}, 10, true, false))
|
||||
if !reflect.DeepEqual(have, []string{"i2"}) {
|
||||
t.Errorf("unicode search failed: expected [i2], got %v", have)
|
||||
}
|
||||
|
||||
s6 := "世界"
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &s6}, 10, true, false))
|
||||
if !reflect.DeepEqual(have, []string{"i2"}) {
|
||||
t.Errorf("unicode search (CJK) failed: expected [i2], got %v", have)
|
||||
}
|
||||
|
||||
// 5. Trigger: Update
|
||||
db.db.Exec("update items set title = 'Updated Title' where guid = 'i1'")
|
||||
s7 := "Updated"
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
|
||||
if !reflect.DeepEqual(have, []string{"i1"}) {
|
||||
t.Errorf("update trigger failed: expected [i1], got %v", have)
|
||||
}
|
||||
|
||||
// 6. Trigger: Delete
|
||||
db.db.Exec("delete from items where guid = 'i1'")
|
||||
have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
|
||||
if len(have) > 0 {
|
||||
t.Errorf("delete trigger failed: found deleted item: %v", have)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ var migrations = []func(*sql.Tx) error{
|
||||
m11_add_item_last_arrived,
|
||||
m12_remove_feed_sizes,
|
||||
m13_consolidate_feed_states,
|
||||
m14_upgrade_fts5,
|
||||
}
|
||||
|
||||
var maxVersion = int64(len(migrations))
|
||||
@@ -382,3 +383,40 @@ func m13_consolidate_feed_states(tx *sql.Tx) error {
|
||||
_, err := tx.Exec(sql)
|
||||
return err
|
||||
}
|
||||
|
||||
func m14_upgrade_fts5(tx *sql.Tx) error {
|
||||
sql := `
|
||||
-- 1. Drop old FTS4 table and trigger
|
||||
drop table if exists search;
|
||||
drop trigger if exists del_item_search;
|
||||
|
||||
-- 2. Remove search_rowid from items
|
||||
drop index if exists idx_item_search_rowid;
|
||||
alter table items drop column search_rowid;
|
||||
|
||||
-- 3. Create FTS5 virtual table
|
||||
create virtual table search using fts5(
|
||||
title, content,
|
||||
content='items',
|
||||
content_rowid='id',
|
||||
tokenize='unicode61'
|
||||
);
|
||||
|
||||
-- 4. Create triggers for automatic FTS sync
|
||||
create trigger items_ai after insert on items begin
|
||||
insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
|
||||
end;
|
||||
create trigger items_ad after delete on items begin
|
||||
insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
|
||||
end;
|
||||
create trigger items_au after update on items begin
|
||||
insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
|
||||
insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
|
||||
end;
|
||||
|
||||
-- 5. Populate FTS5 table with existing data
|
||||
insert into search(rowid, title, content) select id, title, strip_html(content) from items;
|
||||
`
|
||||
_, err := tx.Exec(sql)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -5,9 +5,18 @@ import (
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
"github.com/mattn/go-sqlite3"
|
||||
"github.com/nkanaev/yarr/src/content/htmlutil"
|
||||
)
|
||||
|
||||
func init() {
|
||||
sql.Register("sqlite3_yarr", &sqlite3.SQLiteDriver{
|
||||
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
|
||||
return conn.RegisterFunc("strip_html", htmlutil.ExtractText, true)
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
type Storage struct {
|
||||
db *sql.DB
|
||||
}
|
||||
@@ -28,7 +37,7 @@ func New(path string) (*Storage, error) {
|
||||
path = path + "?" + params
|
||||
}
|
||||
|
||||
db, err := sql.Open("sqlite3", path)
|
||||
db, err := sql.Open("sqlite3_yarr", path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -118,7 +118,6 @@ func (w *Worker) refresher(feeds []storage.Feed) {
|
||||
w.db.CreateItems(items)
|
||||
}
|
||||
atomic.AddInt32(w.pending, -1)
|
||||
w.db.SyncSearch()
|
||||
}
|
||||
close(srcqueue)
|
||||
close(dstqueue)
|
||||
|
||||
Reference in New Issue
Block a user