ulrik@kaizer.se/ code/
natsort.py

# coding: UTF-8

import itertools
import locale

def natsplit(ustr):
    """
    Split @ustr into runs of normal characters
    and numbers.

    >>> natsplit(u"file1.txt")
    (u'file', 1, u'.txt')
    """
    parts = []
    for isdig, group in itertools.groupby(ustr, unicode.isdigit):
        part = u"".join(group)
        parts.append(int(part) if isdig else part)
    return tuple(parts)

def keyfunc(ntupl):
    u"""
    Return a locale-aware key for tuple @ntupl

    >>> keyfunc((u'file', 1, u'.txt'))
    ('file', 1, '.txt')

    # No way to test this: (depends on locale)
    locale.setlocale(locale.LC_ALL, '')
    keyfunc((u'löv', 1, u'.txt'))
    ('l\xc3\xb6v', 1, '.txt')
    """
    def _encode(nt):
        for obj in nt:
            # NB! localestrxfrm is broken for unicode, so
            # we have to encode into UTF-8 here!
            try:
                yield locale.strxfrm(obj.encode("UTF-8"))
            except AttributeError:
                yield obj
    return tuple(_encode(ntupl))


def sortcorpus(corpus):
    """
    Sort @corpus according to current locale

    (The caller needs to call setlocale)

    >>> sortcorpus(u'''
    ... f0.txt f10.txt f100.txt f105.txt f110.txt f15.txt f20.txt f25.txt
    ... f30.txt f35.txt f40.txt f45.txt f5.txt f50.txt f55.txt f60.txt f65.txt
    ... f70.txt f75.txt f80.txt f85.txt f90.txt f95.txt
    ... '''.split()) # doctest:  +NORMALIZE_WHITESPACE
    [u'f0.txt', u'f5.txt', u'f10.txt', u'f15.txt', u'f20.txt', u'f25.txt',
    u'f30.txt', u'f35.txt', u'f40.txt', u'f45.txt', u'f50.txt', u'f55.txt',
    u'f60.txt', u'f65.txt', u'f70.txt', u'f75.txt', u'f80.txt', u'f85.txt',
    u'f90.txt', u'f95.txt', u'f100.txt', u'f105.txt', u'f110.txt']
    """

    def key(o):
        return keyfunc(natsplit(o))
    return sorted(corpus, key=key)

if __name__ == '__main__':
    # hack to use unicode in docstrings..
    # not needed now but..
    import sys
    reload(sys)
    sys.setdefaultencoding("UTF-8")

    import doctest
    doctest.testmod()