Artur, thanks for help. I managed to add the new strategy to the index. Hurray! I also discovered a bug in the process that I reported via the form.
I still have a few questions:
1. Naming - pg_trgm_match, match, threshold, trgm_check_match, ThresholdStrategyNumber - are these good names?
2. I made trgm_check_match IMMUTABLE. Are there any other modifies that should be there?
3. I defined % (text, pg_trgm_match) but didn't provide a commutator and other helper procedures. Which of them should I implement?
4. Can I obtain query and nlimit with less code?
5. The attached patch replaced "res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit);" with "res = (tmpsml >= nlimit);" to fix the bug on my machine. I'm not sure whether that's the long-term fix we want to have. It's just there to help me make progress with trigrams.
Thanks for help.
Cheers
Greg
ᐧ
diff --git a/contrib/pg_trgm/pg_trgm--1.3.sql b/contrib/pg_trgm/pg_trgm--1.3.sql index b279f7d..faa1fce 100644 --- a/contrib/pg_trgm/pg_trgm--1.3.sql +++ b/contrib/pg_trgm/pg_trgm--1.3.sql @@ -3,6 +3,8 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pg_trgm" to load this file. \quit +CREATE TYPE pg_trgm_match AS (match TEXT, threshold REAL); + -- Deprecated function CREATE FUNCTION set_limit(float4) RETURNS float4 @@ -108,6 +110,18 @@ CREATE OPERATOR <->> ( COMMUTATOR = '<<->' ); +CREATE OR REPLACE FUNCTION trgm_check_match(string TEXT, match pg_trgm_match) RETURNS bool AS $$ +BEGIN + RETURN similarity(match.match, string) >= match.threshold; +END; +$$ LANGUAGE plpgsql IMMUTABLE; + +CREATE OPERATOR %( + leftarg = text, + rightarg = pg_trgm_match, + procedure = trgm_check_match +); + -- gist key CREATE FUNCTION gtrgm_in(cstring) RETURNS gtrgm @@ -126,7 +140,7 @@ CREATE TYPE gtrgm ( ); -- support functions for gist -CREATE FUNCTION gtrgm_consistent(internal,text,smallint,oid,internal) +CREATE FUNCTION gtrgm_consistent(internal,anynonarray,smallint,oid,internal) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -171,7 +185,7 @@ CREATE OPERATOR CLASS gist_trgm_ops FOR TYPE text USING gist AS OPERATOR 1 % (text, text), - FUNCTION 1 gtrgm_consistent (internal, text, smallint, oid, internal), + FUNCTION 1 gtrgm_consistent (internal, anynonarray, smallint, oid, internal), FUNCTION 2 gtrgm_union (internal, internal), FUNCTION 3 gtrgm_compress (internal), FUNCTION 4 gtrgm_decompress (internal), @@ -252,3 +266,6 @@ LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD OPERATOR 7 %> (text, text), FUNCTION 6 (text,text) gin_trgm_triconsistent (internal, int2, text, int4, internal, internal, internal); + +ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD + OPERATOR 9 % (text, pg_trgm_match); diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h index 8cd88e7..f2b6008 100644 --- a/contrib/pg_trgm/trgm.h +++ b/contrib/pg_trgm/trgm.h @@ -34,6 +34,7 @@ #define RegExpICaseStrategyNumber 6 #define WordSimilarityStrategyNumber 7 #define WordDistanceStrategyNumber 8 +#define ThresholdStrategyNumber 9 typedef char trgm[3]; diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c index 3a5aff9..3884b13 100644 --- a/contrib/pg_trgm/trgm_gist.c +++ b/contrib/pg_trgm/trgm_gist.c @@ -5,7 +5,10 @@ #include "trgm.h" +#include "access/htup.h" +#include "access/htup_details.h" #include "access/stratnum.h" +#include "utils/typcache.h" #include "fmgr.h" @@ -181,7 +184,7 @@ Datum gtrgm_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); - text *query = PG_GETARG_TEXT_P(1); + text *query; StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); /* Oid subtype = PG_GETARG_OID(3); */ @@ -189,10 +192,43 @@ gtrgm_consistent(PG_FUNCTION_ARGS) TRGM *key = (TRGM *) DatumGetPointer(entry->key); TRGM *qtrg; bool res; - Size querysize = VARSIZE(query); + Size querysize; gtrgm_consistent_cache *cache; double nlimit; + HeapTupleHeader query_match; + Oid tupType; + int32 tupTypmod; + TupleDesc tupdesc; + HeapTupleData tuple; + bool isnull; + + if (strategy == ThresholdStrategyNumber) + { + query_match = PG_GETARG_HEAPTUPLEHEADER(1); + tupType = HeapTupleHeaderGetTypeId(query_match); + tupTypmod = HeapTupleHeaderGetTypMod(query_match); + tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); + + tuple.t_len = HeapTupleHeaderGetDatumLength(query_match); + ItemPointerSetInvalid(&(tuple.t_self)); + tuple.t_tableOid = InvalidOid; + tuple.t_data = query_match; + + query = DatumGetTextP(fastgetattr(&tuple, 1, tupdesc, &isnull)); + querysize = VARSIZE(query); + nlimit = DatumGetFloat4(fastgetattr(&tuple, 2, tupdesc, &isnull)); + + ReleaseTupleDesc(tupdesc); + } + else + { + query = PG_GETARG_TEXT_P(1); + querysize = VARSIZE(query); + nlimit = (strategy == SimilarityStrategyNumber) ? + similarity_threshold : word_similarity_threshold; + } + /* * We keep the extracted trigrams in cache, because trigram extraction is * relatively CPU-expensive. When trying to reuse a cached value, check @@ -220,6 +256,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: + case ThresholdStrategyNumber: qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ); break; @@ -289,10 +326,9 @@ gtrgm_consistent(PG_FUNCTION_ARGS) { case SimilarityStrategyNumber: case WordSimilarityStrategyNumber: + case ThresholdStrategyNumber: /* Similarity search is exact. Word similarity search is inexact */ *recheck = (strategy == WordSimilarityStrategyNumber); - nlimit = (strategy == SimilarityStrategyNumber) ? - similarity_threshold : word_similarity_threshold; if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */ @@ -305,7 +341,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS) float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck); /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */ - res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit); + res = (tmpsml >= nlimit); } else if (ISALLTRUE(key)) { /* non-leaf contains signature */ @@ -474,6 +510,7 @@ gtrgm_distance(PG_FUNCTION_ARGS) { case DistanceStrategyNumber: case WordDistanceStrategyNumber: + case ThresholdStrategyNumber: *recheck = strategy == WordDistanceStrategyNumber; if (GIST_LEAF(entry)) { /* all leafs contains orig trgm */
-- Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-general