Search Postgresql Archives

Re: COLLATE

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



"Filip Rembiaâkowski" <plk.zuber@xxxxxxxxx> writes:

> let's assume that we keep Unicode text  data in the column.
> sometimes we want to sort it according to specific collation order.
> how can we force collation when running a query?
> ideal solution would be having SQL92 standard COLLATE clauses in
> SELECT statements. I heard it's work in progress, what's the status?
> 
> are there any "partial" solutions to the problem?

I don't know the status of the full COLLATE support.

But the closest partial solution suggested so far is the pg_xfrm function that
has been implemented and posted at least three times by three different
posters to the postgres mailing lists. In the interest of avoiding a fourth
independent implementation I'll attach the one I use below, it's not big.


/*
  * Joe Conway <mail@xxxxxxxxxxxxx>
  *
  * Copyright (c) 2004, Joseph E. Conway
  * ALL RIGHTS RESERVED
  *
  * Permission to use, copy, modify, and distribute this software and its
  * documentation for any purpose, without fee, and without a written agreement
  * is hereby granted, provided that the above copyright notice and this
  * paragraph and the following two paragraphs appear in all copies.
  *
  * IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY FOR
  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
  * DOCUMENTATION, EVEN IF THE AUTHOR OR DISTRIBUTORS HAVE BEEN ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
  * ON AN "AS IS" BASIS, AND THE AUTHOR AND DISTRIBUTORS HAS NO OBLIGATIONS TO
  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
  *
  */

/*
 * 
 * If your libc strxfrm() overflows the buffer provided ignoring the length
 * argument then add this define. It causes this function to do an extra
 * strxfrm() call on every execution to get the actual size of buffer needed.
 * 
 * #define DONT_TRUST_STRXFRM
*/

/*
 *
 * CREATE OR REPLACE function pg_strxfrm(text,text) RETURNS bytea 
 *     AS 'strxfrm2.so', 'pg_strxfrm' LANGUAGE c IMMUTABLE STRICT;
 * 
 * Usage: pg_strxfrm(string, locale)
 */

#include <setjmp.h>
#include <string.h>
#include "postgres.h"
#include "fmgr.h"
#include "tcop/tcopprot.h"
#include "utils/builtins.h"

#define GET_STR(textp) \
  DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
#define GET_BYTEA(str_) \
  DatumGetTextP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
#define MAX_BYTEA_LEN    0x3fffffff

/*
 * pg_strxfrm - Function to convert string similar to the strxfrm C
 * function using a specified locale.
 */

extern Datum pg_strxfrm(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(pg_strxfrm);

Datum
pg_strxfrm(PG_FUNCTION_ARGS)
{
	char       *str = GET_STR(PG_GETARG_TEXT_P(0));
	char       *localestr = GET_STR(PG_GETARG_TEXT_P(1));
	size_t      approx_trans_len;
	
	char       *trans;
	
	size_t      actual_trans_len;
	char       *oldlocale;
	char       *newlocale;
	sigjmp_buf  save_restart;

#ifndef DONT_TRUST_STRXFRM
	size_t      str_len = strlen(str);
	static unsigned guess = 1;

	approx_trans_len = guess + guess * str_len + 1;
	if (approx_trans_len > MAX_BYTEA_LEN)
		elog(ERROR, "source string too long to transform");
	trans = (char *) palloc(approx_trans_len);
#else
	approx_trans_len = 0;
	trans = NULL;
#endif

	oldlocale = setlocale(LC_COLLATE, NULL);
	if (!oldlocale)
		elog(ERROR, "setlocale failed to return a locale");
	oldlocale = pstrdup(oldlocale);

	/* catch elog while locale is set other than the default */
	memcpy(&save_restart, &Warn_restart, sizeof(save_restart));
	if (sigsetjmp(Warn_restart, 1) != 0)
	{
		memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart));
		newlocale = setlocale(LC_COLLATE, oldlocale);
		if (!newlocale)
			elog(PANIC, "setlocale failed to reset locale: %s", localestr);
		siglongjmp(Warn_restart, 1);
	}

	newlocale = setlocale(LC_COLLATE, localestr);
	if (!newlocale)
		elog(ERROR, "setlocale failed to set a locale: %s", localestr);

	actual_trans_len = strxfrm(trans, str, approx_trans_len);
	/* if the buffer was not large enough, resize it and try again */
	if (actual_trans_len >= approx_trans_len)
	{
		approx_trans_len = actual_trans_len + 1;
		if (approx_trans_len > MAX_BYTEA_LEN)
			elog(ERROR, "source string too long to transform");

		if (trans)
			trans = (char *) repalloc(trans, approx_trans_len);
		else
			trans = (char *) palloc(approx_trans_len);

		actual_trans_len = strxfrm(trans, str, approx_trans_len);

#ifndef DONT_TRUST_STRXFRM
		while(actual_trans_len >= guess + guess * str_len)
			guess++;
		elog(INFO, "strxfrm seems to need %d*n+%d sized buffer", guess, guess + 1);
#endif

		/* if the buffer still not large enough, punt */
		if (actual_trans_len >= approx_trans_len)
			elog(ERROR, "strxfrm failed, buffer insufficient");
	}
	newlocale = setlocale(LC_COLLATE, oldlocale);
	if (!newlocale)
		elog(PANIC, "setlocale failed to reset locale: %s", localestr);

	/* restore normal error handling */
	memcpy(&Warn_restart, &save_restart, sizeof(Warn_restart));

	PG_RETURN_BYTEA_P(GET_BYTEA(trans));
}
SET search_path = public;

SET autocommit TO 'on';

CREATE OR REPLACE FUNCTION pg_strxfrm(text, text) RETURNS bytea
AS 'pg_strxfrm.so', 'pg_strxfrm'
LANGUAGE 'C' STRICT IMMUTABLE ;


-- 
greg

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Books]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]
  Powered by Linux