Full-Text Search

DB9 supports full-text search with language-specific tokenizers.

Available Tokenizers

Tokenizer	Aliases	Description
`jieba`	`chinese`, `zhparser`	Chinese word segmentation (jieba-rs)
`chinese_ngram`	`zhparser_ngram`	Chinese + bigram overlay for multi-char words
`simple`	-	Whitespace tokenizer for English/Latin

Chinese Text Search (jieba)

-- Create index
CREATE INDEX idx_content_fts ON documents
USING gin(to_tsvector('jieba', content));

-- Search
SELECT * FROM documents
WHERE to_tsvector('jieba', content) @@ plainto_tsquery('jieba', '关键词');

English Text Search

-- Create index
CREATE INDEX idx_content_fts ON documents
USING gin(to_tsvector('simple', content));

-- Search
SELECT * FROM documents
WHERE to_tsvector('simple', content) @@ to_tsquery('simple', 'keyword');

Ranking Results

SELECT
  content,
  ts_rank(to_tsvector('jieba', content), plainto_tsquery('jieba', '搜索词')) as rank
FROM documents
WHERE to_tsvector('jieba', content) @@ plainto_tsquery('jieba', '搜索词')
ORDER BY rank DESC
LIMIT 10;

Query Types

Function	Description	Example
`plainto_tsquery`	Simple phrase	`plainto_tsquery('jieba', '人工智能')`
`to_tsquery`	Boolean operators	`to_tsquery('simple', 'cat & dog')`
`phraseto_tsquery`	Exact phrase	`phraseto_tsquery('simple', 'hello world')`
`websearch_to_tsquery`	Google-style	`websearch_to_tsquery('simple', '"exact" -exclude')`

Boolean Search

-- AND: both terms must match
SELECT * FROM documents
WHERE tsv @@ to_tsquery('simple', 'database & performance');

-- OR: either term matches
SELECT * FROM documents
WHERE tsv @@ to_tsquery('simple', 'postgres | mysql');

-- NOT: exclude term
SELECT * FROM documents
WHERE tsv @@ to_tsquery('simple', 'database & !oracle');

-- Prefix matching
SELECT * FROM documents
WHERE tsv @@ to_tsquery('simple', 'data:*');

Highlight Search Results

SELECT
  ts_headline('jieba', content, plainto_tsquery('jieba', '数据库'),
    'StartSel=<b>, StopSel=</b>, MaxWords=50'
  ) as highlighted
FROM documents
WHERE to_tsvector('jieba', content) @@ plainto_tsquery('jieba', '数据库');

Search with Weights

-- Prioritize title matches over body
ALTER TABLE articles ADD COLUMN tsv tsvector;
UPDATE articles SET tsv =
  setweight(to_tsvector('jieba', title), 'A') ||
  setweight(to_tsvector('jieba', body), 'B');

-- Search with weighted ranking
SELECT title, ts_rank(tsv, q) as rank
FROM articles, plainto_tsquery('jieba', '搜索词') q
WHERE tsv @@ q
ORDER BY rank DESC;