[wp-trac] [WordPress Trac] #16530: Implement locale based sorting

WordPress Trac wp-trac at lists.automattic.com
Sat Feb 12 04:50:50 UTC 2011


#16530: Implement locale based sorting
-------------------------+------------------------------
 Reporter:  cyberskull   |       Owner:
     Type:  enhancement  |      Status:  new
 Priority:  normal       |   Milestone:  Awaiting Review
Component:  I18N         |     Version:  3.1
 Severity:  normal       |  Resolution:
 Keywords:               |
-------------------------+------------------------------

Comment (by Denis-de-Bernardy):

 Replying to [comment:6 dd32]:
 > Just like to mention here, That sorting is currently mainly done at the
 database level, and then a subselect is done with a LIMIT.

 Hehe. My point entirely, if any. ;-)

 That said, we *could* add some kind of sort_post column here or there to
 work around this.

 Here's a function I use in postgresql to deal with this in case there is
 any interest to replicate it using MySQL and a trigger:

 {{{
 CREATE OR REPLACE FUNCTION natsort(text)
         RETURNS text
 AS $$
 DECLARE
         _str    text := $1;
         _pad    int := 15; -- Maximum precision for PostgreSQL floats
 BEGIN
         -- Bail if the string is empty
         IF      trim(_str) = ''
         THEN
                 RETURN '';
         END IF;

         -- Strip accents and lower the case
         _str := lower(unaccent(_str));

         -- Replace nonsensical characters
         _str := regexp_replace(_str,
 E'[^a-z0-9$¢£¥₤€@&%\\(\\)\\[\\]\\{\\}_:;,\\.\\?!\\+\\-]+', ' ', 'g');

         -- Trim the result
         _str := trim(_str);

         -- Todo: we'd ideally want to strip leading articles/prepositions
 ('a', 'the') at this stage,
         -- but to_tsvector() also strips common words (e.g. 'all').

         -- We're done if the string contains no numbers
         IF      _str !~ '[0-9]'
         THEN
                 RETURN _str;
         END IF;

         -- Force spaces between numbers, so we can use
 regexp_split_to_table()
         _str := regexp_replace(_str,
 E'((?:[0-9]+|[0-9]*\\.[0-9]+)(?:e[+-]?[0-9]+\\M)?)', E' \\1 ', 'g');

         -- Pad zeros to obtain a reasonably natural looking sort order
         RETURN array_to_string(ARRAY (
         SELECT  CASE
                         WHEN val !~ E'^\\.?[0-9]'
                         THEN
                                 -- Not a number; return as is
                                 val
                         ELSE
                                 -- Do our best...
                                 COALESCE(lpad(substring(val::numeric::text
 from '^[0-9]+'), _pad, '0'), '') ||
                                 COALESCE(rpad(substring(val::numeric::text
 from E'\\.[0-9]+'), _pad, '0'), '')
                         END
         FROM    regexp_split_to_table(_str, E'\\s+') as val
         WHERE   val <> ''
         ), ' ');
 END;
 $$ IMMUTABLE STRICT LANGUAGE plpgsql COST 1;

 }}}

-- 
Ticket URL: <http://core.trac.wordpress.org/ticket/16530#comment:7>
WordPress Trac <http://core.trac.wordpress.org/>
WordPress blogging software


More information about the wp-trac mailing list