diff --git a/makefile b/makefile
index 2448b21..d4892b4 100644
--- a/makefile
+++ b/makefile
@@ -1,7 +1,7 @@
VERSION=2.6
GITHASH=$(shell git rev-parse --short=8 HEAD)
-GO_TAGS = sqlite_foreign_keys sqlite_json
+GO_TAGS = sqlite_foreign_keys sqlite_json sqlite_fts5
GO_LDFLAGS = -s -w -X 'main.Version=$(VERSION)' -X 'main.GitHash=$(GITHASH)'
GO_FLAGS = -tags "$(GO_TAGS)" -ldflags="$(GO_LDFLAGS)"
diff --git a/src/server/routes.go b/src/server/routes.go
index 985da91..764cbd8 100644
--- a/src/server/routes.go
+++ b/src/server/routes.go
@@ -257,7 +257,6 @@ func (s *Server) handleFeedList(c *router.Context) {
items := worker.ConvertItems(result.Feed.Items, *feed)
if len(items) > 0 {
s.db.CreateItems(items)
- s.db.SyncSearch()
}
s.worker.FindFeedFavicon(*feed)
diff --git a/src/storage/item.go b/src/storage/item.go
index 282e413..71b290f 100644
--- a/src/storage/item.go
+++ b/src/storage/item.go
@@ -9,8 +9,6 @@ import (
"sort"
"strings"
"time"
-
- "github.com/nkanaev/yarr/src/content/htmlutil"
)
type ItemStatus int
@@ -195,7 +193,7 @@ func listQueryPredicate(filter ItemFilter, newestFirst bool) (string, []any) {
cond = append(
cond,
- "i.search_rowid in (select rowid from search where search match :search)",
+ "i.id in (select rowid as id from search where search match :search)",
)
args = append(args, sql.Named("search", strings.Join(terms, " ")))
}
@@ -379,46 +377,6 @@ func (s *Storage) FeedStats() []FeedStat {
return result
}
-func (s *Storage) SyncSearch() {
- rows, err := s.db.Query(`
- select id, title, content
- from items
- where search_rowid is null;
- `)
- if err != nil {
- log.Print(err)
- return
- }
-
- items := make([]Item, 0)
- for rows.Next() {
- var item Item
- rows.Scan(&item.Id, &item.Title, &item.Content)
- items = append(items, item)
- }
-
- for _, item := range items {
- result, err := s.db.Exec(`
- insert into search (title, description, content) values (:title, "", :content)`,
- sql.Named("title", item.Title),
- sql.Named("content", htmlutil.ExtractText(item.Content)),
- )
- if err != nil {
- log.Print(err)
- return
- }
- if numrows, err := result.RowsAffected(); err == nil && numrows == 1 {
- if rowId, err := result.LastInsertId(); err == nil {
- s.db.Exec(
- `update items set search_rowid = :search_rowid where id = :id`,
- sql.Named("search_rowid", rowId),
- sql.Named("id", item.Id),
- )
- }
- }
- }
-}
-
var (
itemsKeepSize = 50
itemsKeepDays = 90
diff --git a/src/storage/item_test.go b/src/storage/item_test.go
index 7d76c20..9f3a8f0 100644
--- a/src/storage/item_test.go
+++ b/src/storage/item_test.go
@@ -212,7 +212,6 @@ func TestListItems(t *testing.T) {
}
// filter by search
- db.SyncSearch()
search1 := "title111"
have = getItemGuids(db.ListItems(ItemFilter{Search: &search1}, 4, true, false))
want = []string{"item111"}
@@ -432,3 +431,85 @@ func TestCreateItemsLastArrived(t *testing.T) {
}
})
}
+
+func TestSearch(t *testing.T) {
+ db := testDB()
+ defer db.Close()
+ feed := db.CreateFeed(CreateFeedParams{Title: "f", FeedLink: "http://f.xml"})
+
+ db.CreateItems([]Item{
+ {
+ GUID: "i1",
+ FeedId: feed.Id,
+ Title: "Hello World",
+ Content: "This is a test of the emergency broadcast system.",
+ },
+ {
+ GUID: "i2",
+ FeedId: feed.Id,
+ Title: "FTS5 Unicode",
+ Content: "Unicode support with characters like: Привет, 世界, 🚀",
+ },
+ {
+ GUID: "i3",
+ FeedId: feed.Id,
+ Title: "Hidden Tag",
+ Content: `
Don't find me by my class name
`,
+ },
+ })
+
+ // 1. Basic search
+ s1 := "emergency"
+ have := getItemGuids(db.ListItems(ItemFilter{Search: &s1}, 10, true, false))
+ if !reflect.DeepEqual(have, []string{"i1"}) {
+ t.Errorf("basic search failed: expected [i1], got %v", have)
+ }
+
+ // 2. HTML stripping: Should find text, but NOT the tags
+ s2 := "test"
+ have = getItemGuids(db.ListItems(ItemFilter{Search: &s2}, 10, true, false))
+ if !reflect.DeepEqual(have, []string{"i1"}) {
+ t.Errorf("html text search failed: expected [i1], got %v", have)
+ }
+
+ s3 := "secret-class"
+ have = getItemGuids(db.ListItems(ItemFilter{Search: &s3}, 10, true, false))
+ if len(have) > 0 {
+ t.Errorf("html tag search should have failed but found: %v", have)
+ }
+
+ // 3. Multi-word (AND)
+ s4 := "broadcast system"
+ have = getItemGuids(db.ListItems(ItemFilter{Search: &s4}, 10, true, false))
+ if !reflect.DeepEqual(have, []string{"i1"}) {
+ t.Errorf("multi-word search failed: expected [i1], got %v", have)
+ }
+
+ // 4. Unicode
+ s5 := "Привет"
+ have = getItemGuids(db.ListItems(ItemFilter{Search: &s5}, 10, true, false))
+ if !reflect.DeepEqual(have, []string{"i2"}) {
+ t.Errorf("unicode search failed: expected [i2], got %v", have)
+ }
+
+ s6 := "世界"
+ have = getItemGuids(db.ListItems(ItemFilter{Search: &s6}, 10, true, false))
+ if !reflect.DeepEqual(have, []string{"i2"}) {
+ t.Errorf("unicode search (CJK) failed: expected [i2], got %v", have)
+ }
+
+ // 5. Trigger: Update
+ db.db.Exec("update items set title = 'Updated Title' where guid = 'i1'")
+ s7 := "Updated"
+ have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
+ if !reflect.DeepEqual(have, []string{"i1"}) {
+ t.Errorf("update trigger failed: expected [i1], got %v", have)
+ }
+
+ // 6. Trigger: Delete
+ db.db.Exec("delete from items where guid = 'i1'")
+ have = getItemGuids(db.ListItems(ItemFilter{Search: &s7}, 10, true, false))
+ if len(have) > 0 {
+ t.Errorf("delete trigger failed: found deleted item: %v", have)
+ }
+}
diff --git a/src/storage/migration.go b/src/storage/migration.go
index 714f464..5099305 100644
--- a/src/storage/migration.go
+++ b/src/storage/migration.go
@@ -21,6 +21,7 @@ var migrations = []func(*sql.Tx) error{
m11_add_item_last_arrived,
m12_remove_feed_sizes,
m13_consolidate_feed_states,
+ m14_upgrade_fts5,
}
var maxVersion = int64(len(migrations))
@@ -382,3 +383,40 @@ func m13_consolidate_feed_states(tx *sql.Tx) error {
_, err := tx.Exec(sql)
return err
}
+
+func m14_upgrade_fts5(tx *sql.Tx) error {
+ sql := `
+ -- 1. Drop old FTS4 table and trigger
+ drop table if exists search;
+ drop trigger if exists del_item_search;
+
+ -- 2. Remove search_rowid from items
+ drop index if exists idx_item_search_rowid;
+ alter table items drop column search_rowid;
+
+ -- 3. Create FTS5 virtual table
+ create virtual table search using fts5(
+ title, content,
+ content='items',
+ content_rowid='id',
+ tokenize='unicode61'
+ );
+
+ -- 4. Create triggers for automatic FTS sync
+ create trigger items_ai after insert on items begin
+ insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
+ end;
+ create trigger items_ad after delete on items begin
+ insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
+ end;
+ create trigger items_au after update on items begin
+ insert into search(search, rowid, title, content) values('delete', old.id, old.title, strip_html(old.content));
+ insert into search(rowid, title, content) values (new.id, new.title, strip_html(new.content));
+ end;
+
+ -- 5. Populate FTS5 table with existing data
+ insert into search(rowid, title, content) select id, title, strip_html(content) from items;
+ `
+ _, err := tx.Exec(sql)
+ return err
+}
diff --git a/src/storage/storage.go b/src/storage/storage.go
index c933148..6b415a5 100644
--- a/src/storage/storage.go
+++ b/src/storage/storage.go
@@ -5,9 +5,18 @@ import (
"log"
"strings"
- _ "github.com/mattn/go-sqlite3"
+ "github.com/mattn/go-sqlite3"
+ "github.com/nkanaev/yarr/src/content/htmlutil"
)
+func init() {
+ sql.Register("sqlite3_yarr", &sqlite3.SQLiteDriver{
+ ConnectHook: func(conn *sqlite3.SQLiteConn) error {
+ return conn.RegisterFunc("strip_html", htmlutil.ExtractText, true)
+ },
+ })
+}
+
type Storage struct {
db *sql.DB
}
@@ -28,7 +37,7 @@ func New(path string) (*Storage, error) {
path = path + "?" + params
}
- db, err := sql.Open("sqlite3", path)
+ db, err := sql.Open("sqlite3_yarr", path)
if err != nil {
return nil, err
}
diff --git a/src/worker/worker.go b/src/worker/worker.go
index 355a72f..3e69a0f 100644
--- a/src/worker/worker.go
+++ b/src/worker/worker.go
@@ -118,7 +118,6 @@ func (w *Worker) refresher(feeds []storage.Feed) {
w.db.CreateItems(items)
}
atomic.AddInt32(w.pending, -1)
- w.db.SyncSearch()
}
close(srcqueue)
close(dstqueue)