Kaydet (Commit) 1962a96a authored tarafından Matthew Somerville's avatar Matthew Somerville Kaydeden (comit) Tim Graham

Fixed #24938 -- Added PostgreSQL trigram support.

üst d7334b40
...@@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created ...@@ -3,7 +3,7 @@ from django.db.backends.signals import connection_created
from django.db.models import CharField, TextField from django.db.models import CharField, TextField
from django.utils.translation import ugettext_lazy as _ from django.utils.translation import ugettext_lazy as _
from .lookups import SearchLookup, Unaccent from .lookups import SearchLookup, TrigramSimilar, Unaccent
from .signals import register_hstore_handler from .signals import register_hstore_handler
...@@ -17,3 +17,5 @@ class PostgresConfig(AppConfig): ...@@ -17,3 +17,5 @@ class PostgresConfig(AppConfig):
TextField.register_lookup(Unaccent) TextField.register_lookup(Unaccent)
CharField.register_lookup(SearchLookup) CharField.register_lookup(SearchLookup)
TextField.register_lookup(SearchLookup) TextField.register_lookup(SearchLookup)
CharField.register_lookup(TrigramSimilar)
TextField.register_lookup(TrigramSimilar)
...@@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact): ...@@ -60,3 +60,8 @@ class SearchLookup(SearchVectorExact):
self.lhs = SearchVector(self.lhs) self.lhs = SearchVector(self.lhs)
lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection) lhs, lhs_params = super(SearchLookup, self).process_lhs(qn, connection)
return lhs, lhs_params return lhs, lhs_params
class TrigramSimilar(PostgresSimpleLookup):
lookup_name = 'trigram_similar'
operator = '%%'
...@@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension): ...@@ -40,3 +40,9 @@ class UnaccentExtension(CreateExtension):
def __init__(self): def __init__(self):
self.name = 'unaccent' self.name = 'unaccent'
class TrigramExtension(CreateExtension):
def __init__(self):
self.name = 'pg_trgm'
...@@ -185,3 +185,19 @@ class SearchRank(Func): ...@@ -185,3 +185,19 @@ class SearchRank(Func):
SearchVectorField.register_lookup(SearchVectorExact) SearchVectorField.register_lookup(SearchVectorExact)
class TrigramBase(Func):
def __init__(self, expression, string, **extra):
if not hasattr(string, 'resolve_expression'):
string = Value(string)
super(TrigramBase, self).__init__(expression, string, output_field=FloatField(), **extra)
class TrigramSimilarity(TrigramBase):
function = 'SIMILARITY'
class TrigramDistance(TrigramBase):
function = ''
arg_joiner = ' <-> '
...@@ -2,6 +2,32 @@ ...@@ -2,6 +2,32 @@
PostgreSQL specific lookups PostgreSQL specific lookups
=========================== ===========================
Trigram similarity
==================
.. fieldlookup:: trigram_similar
.. versionadded:: 1.10
The ``trigram_similar`` lookup allows you to perform trigram lookups,
measuring the number of trigrams (three consecutive characters) shared, using a
dedicated PostgreSQL extension. A trigram lookup is given an expression and
returns results that have a similarity measurement greater than the current
similarity threshold.
To use it, add ``'django.contrib.postgres'`` in your :setting:`INSTALLED_APPS`
and activate the `pg_trgm extension
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
PostgreSQL. You can install the extension using the
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
operation.
The ``trigram_similar`` lookup can be used on
:class:`~django.db.models.CharField` and :class:`~django.db.models.TextField`::
>>> City.objects.filter(name__trigram_similar="Middlesborough")
['<City: Middlesbrough>']
``Unaccent`` ``Unaccent``
============ ============
......
...@@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module. ...@@ -27,6 +27,16 @@ the ``django.contrib.postgres.operations`` module.
which will install the ``hstore`` extension and also immediately set up the which will install the ``hstore`` extension and also immediately set up the
connection to interpret hstore data. connection to interpret hstore data.
``TrigramExtension``
====================
.. class:: TrigramExtension()
.. versionadded:: 1.10
A subclass of :class:`~django.contrib.postgres.operations.CreateExtension`
that installs the ``pg_trgm`` extension.
``UnaccentExtension`` ``UnaccentExtension``
===================== =====================
......
...@@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``:: ...@@ -189,3 +189,58 @@ if it were an annotated ``SearchVector``::
[<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>] [<Entry: Cheese on Toast recipes>, <Entry: Pizza recipes>]
.. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS .. _PostgreSQL documentation: http://www.postgresql.org/docs/current/static/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
Trigram similarity
==================
Another approach to searching is trigram similarity. A trigram is a group of
three consecutive characters. In addition to the :lookup:`trigram_similar`
lookup, you can use a couple of other expressions.
To use them, you need to activate the `pg_trgm extension
<http://www.postgresql.org/docs/current/interactive/pgtrgm.html>`_ on
PostgreSQL. You can install it using the
:class:`~django.contrib.postgres.operations.TrigramExtension` migration
operation.
``TrigramSimilarity``
---------------------
.. class:: TrigramSimilarity(expression, string, **extra)
.. versionadded:: 1.10
Accepts a field name or expression, and a string or expression. Returns the
trigram similarity between the two arguments.
Usage example::
>>> from django.contrib.postgres.search import TrigramSimilarity
>>> Author.objects.create(name='Katy Stevens')
>>> Author.objects.create(name='Stephen Keats')
>>> test = 'Katie Stephens'
>>> Author.objects.annotate(
... similarity=TrigramSimilarity('name', test),
... ).filter(similarity__gt=0.3).order_by('-similarity')
[<Author: Katy Stephens>, <Author: Stephen Keats>]
``TrigramDistance``
-------------------
.. class:: TrigramDistance(expression, string, **extra)
.. versionadded:: 1.10
Accepts a field name or expression, and a string or expression. Returns the
trigram distance between the two arguments.
Usage example::
>>> from django.contrib.postgres.search import TrigramDistance
>>> Author.objects.create(name='Katy Stevens')
>>> Author.objects.create(name='Stephen Keats')
>>> test = 'Katie Stephens'
>>> Author.objects.annotate(
... distance=TrigramDistance('name', test),
... ).filter(distance__lte=0.7).order_by('distance')
[<Author: Katy Stephens>, <Author: Stephen Keats>]
...@@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational ...@@ -33,6 +33,10 @@ search engine. You can search across multiple fields in your relational
database, combine the searches with other lookups, use different language database, combine the searches with other lookups, use different language
configurations and weightings, and rank the results by relevance. configurations and weightings, and rank the results by relevance.
It also now includes trigram support, using the :lookup:`trigram_similar`
lookup, and the :class:`~django.contrib.postgres.search.TrigramSimilarity` and
:class:`~django.contrib.postgres.search.TrigramDistance` expressions.
Minor features Minor features
-------------- --------------
......
...@@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`:: ...@@ -55,11 +55,12 @@ use :lookup:`unaccented comparison <unaccent>`::
This shows another issue, where we are matching against a different spelling of This shows another issue, where we are matching against a different spelling of
the name. In this case we have an asymmetry though - a search for ``Helen`` the name. In this case we have an asymmetry though - a search for ``Helen``
will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option will pick up ``Helena`` or ``Hélène``, but not the reverse. Another option
would be to use a trigram comparison, which compares sequences of letters. would be to use a :lookup:`trigram_similar` comparison, which compares
sequences of letters.
For example:: For example::
>>> Author.objects.filter(name__unaccent__lower__trigram='Hélène') >>> Author.objects.filter(name__unaccent__lower__trigram_similar='Hélène')
[<Author: Helen Mirren>, <Actor: Hélène Joy>] [<Author: Helen Mirren>, <Actor: Hélène Joy>]
Now we have a different problem - the longer name of "Helena Bonham Carter" Now we have a different problem - the longer name of "Helena Bonham Carter"
......
...@@ -5,12 +5,13 @@ from django.db import migrations ...@@ -5,12 +5,13 @@ from django.db import migrations
try: try:
from django.contrib.postgres.operations import ( from django.contrib.postgres.operations import (
CreateExtension, HStoreExtension, UnaccentExtension, CreateExtension, HStoreExtension, TrigramExtension, UnaccentExtension,
) )
except ImportError: except ImportError:
from django.test import mock from django.test import mock
CreateExtension = mock.Mock() CreateExtension = mock.Mock()
HStoreExtension = mock.Mock() HStoreExtension = mock.Mock()
TrigramExtension = mock.Mock()
UnaccentExtension = mock.Mock() UnaccentExtension = mock.Mock()
...@@ -21,5 +22,6 @@ class Migration(migrations.Migration): ...@@ -21,5 +22,6 @@ class Migration(migrations.Migration):
# dash in its name. # dash in its name.
CreateExtension('uuid-ossp'), CreateExtension('uuid-ossp'),
HStoreExtension(), HStoreExtension(),
TrigramExtension(),
UnaccentExtension(), UnaccentExtension(),
] ]
from django.contrib.postgres.search import TrigramDistance, TrigramSimilarity
from django.test import modify_settings
from . import PostgreSQLTestCase
from .models import CharFieldModel, TextFieldModel
@modify_settings(INSTALLED_APPS={'append': 'django.contrib.postgres'})
class TrigramTest(PostgreSQLTestCase):
Model = CharFieldModel
@classmethod
def setUpTestData(cls):
cls.Model.objects.bulk_create([
cls.Model(field='Matthew'),
cls.Model(field='Cat sat on mat.'),
cls.Model(field='Dog sat on rug.'),
])
def test_trigram_search(self):
self.assertQuerysetEqual(
self.Model.objects.filter(field__trigram_similar='Mathew'),
['Matthew'],
transform=lambda instance: instance.field,
)
def test_trigram_similarity(self):
search = 'Bat sat on cat.'
self.assertQuerysetEqual(
self.Model.objects.filter(
field__trigram_similar=search,
).annotate(similarity=TrigramSimilarity('field', search)).order_by('-similarity'),
[('Cat sat on mat.', 0.625), ('Dog sat on rug.', 0.333333)],
transform=lambda instance: (instance.field, instance.similarity),
ordered=True,
)
def test_trigram_similarity_alternate(self):
self.assertQuerysetEqual(
self.Model.objects.annotate(
distance=TrigramDistance('field', 'Bat sat on cat.'),
).filter(distance__lte=0.7).order_by('distance'),
[('Cat sat on mat.', 0.375), ('Dog sat on rug.', 0.666667)],
transform=lambda instance: (instance.field, instance.distance),
ordered=True,
)
class TrigramTextFieldTest(TrigramTest):
"""
TextField has the same behavior as CharField regarding trigram lookups.
"""
Model = TextFieldModel
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment