Kaydet (Commit) 1962a96a authored tarafından Matthew Somerville's avatar Matthew Somerville Kaydeden (comit) Tim Graham

Fixed #24938 -- Added PostgreSQL trigram support.

üst d7334b40
......@@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created
from django.db.models import CharField, TextField
from django.utils.translation import ugettext_lazy as _
from .lookups import SearchLookup, Unaccent
from .lookups import SearchLookup, TrigramSimilar, Unaccent
from .signals import register_hstore_handler
......@@ -17,3 +17,5 @@ class PostgresConfig(AppConfig):
TextField.register_lookup(Unaccent)
CharField.register_lookup(SearchLookup)
TextField.register_lookup(SearchLookup)
CharField.register_lookup(TrigramSimilar)
TextField.register_lookup(TrigramSimilar)
......@@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact):
self.lhs = SearchVector(self.lhs)
lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection)
return lhs, lhs_params
class TrigramSimilar(PostgresSimpleLookup):
lookup_name = 'trigram_similar'
operator = '%%'
......@@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension):
def __init__(self):
self.name = 'unaccent'
class TrigramExtension(CreateExtension):
def __init__(self):
self.name = 'pg_trgm'
......@@ -185,3 +185,19 @@ class SearchRank(Func):
SearchVectorField.register_lookup(SearchVectorExact)
class TrigramBase(Func):
def __init__(self, expression, string, **extra):
if not hasattr(string, 'resolve_expression'):
string = Value(string)
super(TrigramBase, self).__init__(expression, string, output_field=FloatField(), **extra)
class TrigramSimilarity(TrigramBase):
function = 'SIMILARITY'
class TrigramDistance(TrigramBase):
function = ''
arg_joiner = ' <-> '
......@@ -2,6 +2,32 @@
PostgreSQL specific lookups
===========================
Trigram similarity
==================
.. fieldlookup:: trigram_similar
.. versionadded:: 1.10
The ``trigram_similar`` lookup allows you to perform trigram lookups,
measuring the number of trigrams (three consecutive characters) shared, using a
dedicated PostgreSQL extension. A trigram lookup is given an expression and
returns results that have a similarity measurement greater than the current
similarity threshold.
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
and activate the `pg_trgm extension
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
PostgreSQL. You can install the extension using the
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
operation.
The ``trigram_similar`` lookup can be used on
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
>>> City.objects.filter(name__trigram_similar="Middlesborough")
['<City: Middlesbrough>']
``Unaccent``
============
......
......@@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module.
which will install the ``hstore`` extension and also immediately set up the
connection to interpret hstore data.
``TrigramExtension``
====================
.. class:: TrigramExtension()
.. versionadded:: 1.10
A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
that installs the ``pg_trgm`` extension.
``UnaccentExtension``
=====================
......
......@@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``::
[<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]
.. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
Trigram similarity
==================
Another approach to searching is trigram similarity. A trigram is a group of
three consecutive characters. In addition to the :lookup:`trigram_similar`
lookup, you can use a couple of other expressions.
To use them, you need to activate the `pg_trgm extension
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
PostgreSQL. You can install it using the
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
operation.
``TrigramSimilarity``
---------------------
.. class:: TrigramSimilarity(expression, string, **extra)
.. versionadded:: 1.10
Accepts a field name or expression, and a string or expression. Returns the
trigram similarity between the two arguments.
Usage example::
>>> from django.contrib.postgres.search import TrigramSimilarity
>>> Author.objects.create(name='Katy Stevens')
>>> Author.objects.create(name='Stephen Keats')
>>> test = 'Katie Stephens'
>>> Author.objects.annotate(
... similarity=TrigramSimilarity('name', test),
... ).filter(similarity__gt=0.3).order_by('-similarity')
[<Author: Katy Stephens>, <Author: Stephen Keats>]
``TrigramDistance``
-------------------
.. class:: TrigramDistance(expression, string, **extra)
.. versionadded:: 1.10
Accepts a field name or expression, and a string or expression. Returns the
trigram distance between the two arguments.
Usage example::
>>> from django.contrib.postgres.search import TrigramDistance
>>> Author.objects.create(name='Katy Stevens')
>>> Author.objects.create(name='Stephen Keats')
>>> test = 'Katie Stephens'
>>> Author.objects.annotate(
... distance=TrigramDistance('name', test),
... ).filter(distance__lte=0.7).order_by('distance')
[<Author: Katy Stephens>, <Author: Stephen Keats>]
......@@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational
database, combine the searches with other lookups, use different language
configurations and weightings, and rank the results by relevance.
It also now includes trigram support, using the :lookup:`trigram_similar`
lookup, and the :class:`~django.contrib.postgres.search.TrigramSimilarity` and
:class:`~django.contrib.postgres.search.TrigramDistance` expressions.
Minor features
--------------
......
......@@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`::
This shows another issue, where we are matching against a different spelling of
the name. In this case we have an asymmetry though - a search for ``Helen``
will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option
would be to use a trigram comparison, which compares sequences of letters.
would be to use a :lookup:`trigram_similar` comparison, which compares
sequences of letters.
For example::
>>> Author.objects.filter(name__unaccent__lower__trigram='Hélène')
>>> Author.objects.filter(name__unaccent__lower__trigram_similar='Hélène')
[<Author: Helen Mirren>, <Actor: Hélène Joy>]
Now we have a different problem - the longer name of "Helena Bonham Carter"
......
......@@ -5,12 +5,13 @@ from django.db import migrations
try:
from django.contrib.postgres.operations import (
CreateExtension, HStoreExtension, UnaccentExtension,
CreateExtension, HStoreExtension, TrigramExtension, UnaccentExtension,
)
except ImportError:
from django.test import mock
CreateExtension = mock.Mock()
HStoreExtension = mock.Mock()
TrigramExtension = mock.Mock()
UnaccentExtension = mock.Mock()
......@@ -21,5 +22,6 @@ class Migration(migrations.Migration):
# dash in its name.
CreateExtension('uuid-ossp'),
HStoreExtension(),
TrigramExtension(),
UnaccentExtension(),
]
from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
from django.test import modify_settings
from . import PostgreSQLTestCase
from .models import CharFieldModel, TextFieldModel
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class TrigramTest(PostgreSQLTestCase):
Model = CharFieldModel
@classmethod
def setUpTestData(cls):
cls.Model.objects.bulk_create([
cls.Model(field='Matthew'),
cls.Model(field='Cat sat on mat.'),
cls.Model(field='Dog sat on rug.'),
])
def test_trigram_search(self):
self.assertQuerysetEqual(
self.Model.objects.filter(field__trigram_similar='Mathew'),
['Matthew'],
transform=lambda instance: instance.field,
)
def test_trigram_similarity(self):
search = 'Bat sat on cat.'
self.assertQuerysetEqual(
self.Model.objects.filter(
field__trigram_similar=search,
).annotate(similarity=TrigramSimilarity('field', search)).order_by('-similarity'),
[('Cat sat on mat.', 0.625), ('Dog sat on rug.', 0.333333)],
transform=lambda instance: (instance.field, instance.similarity),
ordered=True,
)
def test_trigram_similarity_alternate(self):
self.assertQuerysetEqual(
self.Model.objects.annotate(
distance=TrigramDistance('field', 'Bat sat on cat.'),
).filter(distance__lte=0.7).order_by('distance'),
[('Cat sat on mat.', 0.375), ('Dog sat on rug.', 0.666667)],
transform=lambda instance: (instance.field, instance.distance),
ordered=True,
)
class TrigramTextFieldTest(TrigramTest):
"""
TextField has the same behavior as CharField regarding trigram lookups.
"""
Model = TextFieldModel
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment