"Filip Rembiaâkowski" <plk.zuber@xxxxxxxxx> writes: > let's assume that we keep Unicode text data in the column. > sometimes we want to sort it according to specific collation order. > how can we force collation when running a query? > ideal solution would be having SQL92 standard COLLATE clauses in > SELECT statements. I heard it's work in progress, what's the status? > > are there any "partial" solutions to the problem? I don't know the status of the full COLLATE support. But the closest partial solution suggested so far is the pg_xfrm function that has been implemented and posted at least three times by three different posters to the postgres mailing lists. In the interest of avoiding a fourth independent implementation I'll attach the one I use below, it's not big.
/* * Joe Conway <mail@xxxxxxxxxxxxx> * * Copyright (c) 2004, Joseph E. Conway * ALL RIGHTS RESERVED * * Permission to use, copy, modify, and distribute this software and its * documentation for any purpose, without fee, and without a written agreement * is hereby granted, provided that the above copyright notice and this * paragraph and the following two paragraphs appear in all copies. * * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. * */ /* * * If your libc strxfrm() overflows the buffer provided ignoring the length * argument then add this define. It causes this function to do an extra * strxfrm() call on every execution to get the actual size of buffer needed. * * #define DONT_TRUST_STRXFRM */ /* * * CREATE OR REPLACE function pg_strxfrm(text,text) RETURNS bytea * AS 'strxfrm2.so', 'pg_strxfrm' LANGUAGE c IMMUTABLE STRICT; * * Usage: pg_strxfrm(string, locale) */ #include <setjmp.h> #include <string.h> #include "postgres.h" #include "fmgr.h" #include "tcop/tcopprot.h" #include "utils/builtins.h" #define GET_STR(textp) \ DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp))) #define GET_BYTEA(str_) \ DatumGetTextP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) #define MAX_BYTEA_LEN 0x3fffffff /* * pg_strxfrm - Function to convert string similar to the strxfrm C * function using a specified locale. */ extern Datum pg_strxfrm(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(pg_strxfrm); Datum pg_strxfrm(PG_FUNCTION_ARGS) { char *str = GET_STR(PG_GETARG_TEXT_P(0)); char *localestr = GET_STR(PG_GETARG_TEXT_P(1)); size_t approx_trans_len; char *trans; size_t actual_trans_len; char *oldlocale; char *newlocale; sigjmp_buf save_restart; #ifndef DONT_TRUST_STRXFRM size_t str_len = strlen(str); static unsigned guess = 1; approx_trans_len = guess + guess * str_len + 1; if (approx_trans_len > MAX_BYTEA_LEN) elog(ERROR, "source string too long to transform"); trans = (char *) palloc(approx_trans_len); #else approx_trans_len = 0; trans = NULL; #endif oldlocale = setlocale(LC_COLLATE, NULL); if (!oldlocale) elog(ERROR, "setlocale failed to return a locale"); oldlocale = pstrdup(oldlocale); /* catch elog while locale is set other than the default */ memcpy(&save_restart, &Warn_restart, sizeof(save_restart)); if (sigsetjmp(Warn_restart, 1) != 0) { memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart)); newlocale = setlocale(LC_COLLATE, oldlocale); if (!newlocale) elog(PANIC, "setlocale failed to reset locale: %s", localestr); siglongjmp(Warn_restart, 1); } newlocale = setlocale(LC_COLLATE, localestr); if (!newlocale) elog(ERROR, "setlocale failed to set a locale: %s", localestr); actual_trans_len = strxfrm(trans, str, approx_trans_len); /* if the buffer was not large enough, resize it and try again */ if (actual_trans_len >= approx_trans_len) { approx_trans_len = actual_trans_len + 1; if (approx_trans_len > MAX_BYTEA_LEN) elog(ERROR, "source string too long to transform"); if (trans) trans = (char *) repalloc(trans, approx_trans_len); else trans = (char *) palloc(approx_trans_len); actual_trans_len = strxfrm(trans, str, approx_trans_len); #ifndef DONT_TRUST_STRXFRM while(actual_trans_len >= guess + guess * str_len) guess++; elog(INFO, "strxfrm seems to need %d*n+%d sized buffer", guess, guess + 1); #endif /* if the buffer still not large enough, punt */ if (actual_trans_len >= approx_trans_len) elog(ERROR, "strxfrm failed, buffer insufficient"); } newlocale = setlocale(LC_COLLATE, oldlocale); if (!newlocale) elog(PANIC, "setlocale failed to reset locale: %s", localestr); /* restore normal error handling */ memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart)); PG_RETURN_BYTEA_P(GET_BYTEA(trans)); }
SET search_path = public; SET autocommit TO 'on'; CREATE OR REPLACE FUNCTION pg_strxfrm(text, text) RETURNS bytea AS 'pg_strxfrm.so', 'pg_strxfrm' LANGUAGE 'C' STRICT IMMUTABLE ;
-- greg