Kaydet (Commit) f51c1f59 authored tarafından Loic Bistuer's avatar Loic Bistuer Kaydeden (comit) Anssi Kääriäinen

Fixed #17001 -- Custom querysets for prefetch_related.

This patch introduces the Prefetch object which allows customizing prefetch
operations.

This enables things like filtering prefetched relations, calling select_related
from a prefetched relation, or prefetching the same relation multiple times
with different querysets.

When a Prefetch instance specifies a to_attr argument, the result is stored
in a list rather than a QuerySet. This has the fortunate consequence of being
significantly faster. The preformance improvement is due to the fact that we
save the costly creation of a QuerySet instance.

Thanks @akaariai for the original patch and @bmispelon and @timgraham
for the reviews.
üst b1b04df0
...@@ -76,7 +76,10 @@ class GenericForeignKey(six.with_metaclass(RenameGenericForeignKeyMethods)): ...@@ -76,7 +76,10 @@ class GenericForeignKey(six.with_metaclass(RenameGenericForeignKeyMethods)):
# This should never happen. I love comments like this, don't you? # This should never happen. I love comments like this, don't you?
raise Exception("Impossible arguments to GFK.get_content_type!") raise Exception("Impossible arguments to GFK.get_content_type!")
def get_prefetch_queryset(self, instances): def get_prefetch_queryset(self, instances, queryset=None):
if queryset is not None:
raise ValueError("Custom queryset can't be used for this lookup.")
# For efficiency, group the instances by content type and then do one # For efficiency, group the instances by content type and then do one
# query per model # query per model
fk_dict = defaultdict(set) fk_dict = defaultdict(set)
...@@ -348,17 +351,22 @@ def create_generic_related_manager(superclass): ...@@ -348,17 +351,22 @@ def create_generic_related_manager(superclass):
db = self._db or router.db_for_read(self.model, instance=self.instance) db = self._db or router.db_for_read(self.model, instance=self.instance)
return super(GenericRelatedObjectManager, self).get_queryset().using(db).filter(**self.core_filters) return super(GenericRelatedObjectManager, self).get_queryset().using(db).filter(**self.core_filters)
def get_prefetch_queryset(self, instances): def get_prefetch_queryset(self, instances, queryset=None):
db = self._db or router.db_for_read(self.model, instance=instances[0]) if queryset is None:
queryset = super(GenericRelatedObjectManager, self).get_queryset()
queryset._add_hints(instance=instances[0])
queryset = queryset.using(queryset._db or self._db)
query = { query = {
'%s__pk' % self.content_type_field_name: self.content_type.id, '%s__pk' % self.content_type_field_name: self.content_type.id,
'%s__in' % self.object_id_field_name: set(obj._get_pk_val() for obj in instances) '%s__in' % self.object_id_field_name: set(obj._get_pk_val() for obj in instances)
} }
qs = super(GenericRelatedObjectManager, self).get_queryset().using(db).filter(**query)
# We (possibly) need to convert object IDs to the type of the # We (possibly) need to convert object IDs to the type of the
# instances' PK in order to match up instances: # instances' PK in order to match up instances:
object_id_converter = instances[0]._meta.pk.to_python object_id_converter = instances[0]._meta.pk.to_python
return (qs, return (queryset.filter(**query),
lambda relobj: object_id_converter(getattr(relobj, self.object_id_field_name)), lambda relobj: object_id_converter(getattr(relobj, self.object_id_field_name)),
lambda obj: obj._get_pk_val(), lambda obj: obj._get_pk_val(),
False, False,
......
...@@ -4,7 +4,7 @@ from django.core.exceptions import ObjectDoesNotExist, ImproperlyConfigured # N ...@@ -4,7 +4,7 @@ from django.core.exceptions import ObjectDoesNotExist, ImproperlyConfigured # N
from django.db.models.loading import ( # NOQA from django.db.models.loading import ( # NOQA
get_apps, get_app_path, get_app_paths, get_app, get_models, get_model, get_apps, get_app_path, get_app_paths, get_app, get_models, get_model,
register_models, UnavailableApp) register_models, UnavailableApp)
from django.db.models.query import Q, QuerySet # NOQA from django.db.models.query import Q, QuerySet, Prefetch # NOQA
from django.db.models.expressions import F # NOQA from django.db.models.expressions import F # NOQA
from django.db.models.manager import Manager # NOQA from django.db.models.manager import Manager # NOQA
from django.db.models.base import Model # NOQA from django.db.models.base import Model # NOQA
......
...@@ -162,7 +162,10 @@ class SingleRelatedObjectDescriptor(six.with_metaclass(RenameRelatedObjectDescri ...@@ -162,7 +162,10 @@ class SingleRelatedObjectDescriptor(six.with_metaclass(RenameRelatedObjectDescri
def get_queryset(self, **hints): def get_queryset(self, **hints):
return self.related.model._base_manager.db_manager(hints=hints) return self.related.model._base_manager.db_manager(hints=hints)
def get_prefetch_queryset(self, instances): def get_prefetch_queryset(self, instances, queryset=None):
if queryset is not None:
raise ValueError("Custom queryset can't be used for this lookup.")
rel_obj_attr = attrgetter(self.related.field.attname) rel_obj_attr = attrgetter(self.related.field.attname)
instance_attr = lambda obj: obj._get_pk_val() instance_attr = lambda obj: obj._get_pk_val()
instances_dict = dict((instance_attr(inst), inst) for inst in instances) instances_dict = dict((instance_attr(inst), inst) for inst in instances)
...@@ -264,7 +267,10 @@ class ReverseSingleRelatedObjectDescriptor(six.with_metaclass(RenameRelatedObjec ...@@ -264,7 +267,10 @@ class ReverseSingleRelatedObjectDescriptor(six.with_metaclass(RenameRelatedObjec
else: else:
return QuerySet(self.field.rel.to, hints=hints) return QuerySet(self.field.rel.to, hints=hints)
def get_prefetch_queryset(self, instances): def get_prefetch_queryset(self, instances, queryset=None):
if queryset is not None:
raise ValueError("Custom queryset can't be used for this lookup.")
rel_obj_attr = self.field.get_foreign_related_value rel_obj_attr = self.field.get_foreign_related_value
instance_attr = self.field.get_local_related_value instance_attr = self.field.get_local_related_value
instances_dict = dict((instance_attr(inst), inst) for inst in instances) instances_dict = dict((instance_attr(inst), inst) for inst in instances)
...@@ -397,23 +403,26 @@ def create_foreign_related_manager(superclass, rel_field, rel_model): ...@@ -397,23 +403,26 @@ def create_foreign_related_manager(superclass, rel_field, rel_model):
qs._known_related_objects = {rel_field: {self.instance.pk: self.instance}} qs._known_related_objects = {rel_field: {self.instance.pk: self.instance}}
return qs return qs
def get_prefetch_queryset(self, instances): def get_prefetch_queryset(self, instances, queryset=None):
if queryset is None:
queryset = super(RelatedManager, self).get_queryset()
queryset._add_hints(instance=instances[0])
queryset = queryset.using(queryset._db or self._db)
rel_obj_attr = rel_field.get_local_related_value rel_obj_attr = rel_field.get_local_related_value
instance_attr = rel_field.get_foreign_related_value instance_attr = rel_field.get_foreign_related_value
instances_dict = dict((instance_attr(inst), inst) for inst in instances) instances_dict = dict((instance_attr(inst), inst) for inst in instances)
query = {'%s__in' % rel_field.name: instances} query = {'%s__in' % rel_field.name: instances}
qs = super(RelatedManager, self).get_queryset() queryset = queryset.filter(**query)
qs._add_hints(instance=instances[0])
if self._db:
qs = qs.using(self._db)
qs = qs.filter(**query)
# Since we just bypassed this class' get_queryset(), we must manage # Since we just bypassed this class' get_queryset(), we must manage
# the reverse relation manually. # the reverse relation manually.
for rel_obj in qs: for rel_obj in queryset:
instance = instances_dict[rel_obj_attr(rel_obj)] instance = instances_dict[rel_obj_attr(rel_obj)]
setattr(rel_obj, rel_field.name, instance) setattr(rel_obj, rel_field.name, instance)
cache_name = rel_field.related_query_name() cache_name = rel_field.related_query_name()
return qs, rel_obj_attr, instance_attr, False, cache_name return queryset, rel_obj_attr, instance_attr, False, cache_name
def add(self, *objs): def add(self, *objs):
objs = list(objs) objs = list(objs)
...@@ -563,15 +572,15 @@ def create_many_related_manager(superclass, rel): ...@@ -563,15 +572,15 @@ def create_many_related_manager(superclass, rel):
qs = qs.using(self._db) qs = qs.using(self._db)
return qs._next_is_sticky().filter(**self.core_filters) return qs._next_is_sticky().filter(**self.core_filters)
def get_prefetch_queryset(self, instances): def get_prefetch_queryset(self, instances, queryset=None):
instance = instances[0] if queryset is None:
db = self._db or router.db_for_read(instance.__class__, instance=instance) queryset = super(ManyRelatedManager, self).get_queryset()
queryset._add_hints(instance=instances[0])
queryset = queryset.using(queryset._db or self._db)
query = {'%s__in' % self.query_field_name: instances} query = {'%s__in' % self.query_field_name: instances}
qs = super(ManyRelatedManager, self).get_queryset() queryset = queryset._next_is_sticky().filter(**query)
qs._add_hints(instance=instance)
if self._db:
qs = qs.using(db)
qs = qs._next_is_sticky().filter(**query)
# M2M: need to annotate the query in order to get the primary model # M2M: need to annotate the query in order to get the primary model
# that the secondary model was actually related to. We know that # that the secondary model was actually related to. We know that
...@@ -582,12 +591,12 @@ def create_many_related_manager(superclass, rel): ...@@ -582,12 +591,12 @@ def create_many_related_manager(superclass, rel):
# dealing with PK values. # dealing with PK values.
fk = self.through._meta.get_field(self.source_field_name) fk = self.through._meta.get_field(self.source_field_name)
join_table = self.through._meta.db_table join_table = self.through._meta.db_table
connection = connections[db] connection = connections[queryset.db]
qn = connection.ops.quote_name qn = connection.ops.quote_name
qs = qs.extra(select=dict( queryset = queryset.extra(select=dict(
('_prefetch_related_val_%s' % f.attname, ('_prefetch_related_val_%s' % f.attname,
'%s.%s' % (qn(join_table), qn(f.column))) for f in fk.local_related_fields)) '%s.%s' % (qn(join_table), qn(f.column))) for f in fk.local_related_fields))
return (qs, return (queryset,
lambda result: tuple(getattr(result, '_prefetch_related_val_%s' % f.attname) for f in fk.local_related_fields), lambda result: tuple(getattr(result, '_prefetch_related_val_%s' % f.attname) for f in fk.local_related_fields),
lambda inst: tuple(getattr(inst, f.attname) for f in fk.foreign_related_fields), lambda inst: tuple(getattr(inst, f.attname) for f in fk.foreign_related_fields),
False, False,
......
This diff is collapsed.
...@@ -129,3 +129,32 @@ In general, ``Q() objects`` make it possible to define and reuse conditions. ...@@ -129,3 +129,32 @@ In general, ``Q() objects`` make it possible to define and reuse conditions.
This permits the :ref:`construction of complex database queries This permits the :ref:`construction of complex database queries
<complex-lookups-with-q>` using ``|`` (``OR``) and ``&`` (``AND``) operators; <complex-lookups-with-q>` using ``|`` (``OR``) and ``&`` (``AND``) operators;
in particular, it is not otherwise possible to use ``OR`` in ``QuerySets``. in particular, it is not otherwise possible to use ``OR`` in ``QuerySets``.
``Prefetch()`` objects
======================
.. versionadded:: 1.7
.. class:: Prefetch(lookup, queryset=None, to_attr=None)
The ``Prefetch()`` object can be used to control the operation of
:meth:`~django.db.models.query.QuerySet.prefetch_related()`.
The ``lookup`` argument describes the relations to follow and works the same
as the string based lookups passed to
:meth:`~django.db.models.query.QuerySet.prefetch_related()`.
The ``queryset`` argument supplies a base ``QuerySet`` for the given lookup.
This is useful to further filter down the prefetch operation, or to call
:meth:`~django.db.models.query.QuerySet.select_related()` from the prefetched
relation, hence reducing the number of queries even further.
The ``to_attr`` argument sets the result of the prefetch operation to a custom
attribute.
.. note::
When using ``to_attr`` the prefetched result is stored in a list.
This can provide a significant speed improvement over traditional
``prefetch_related`` calls which store the cached result within a
``QuerySet`` instance.
...@@ -898,7 +898,7 @@ objects have already been fetched, and it will skip fetching them again. ...@@ -898,7 +898,7 @@ objects have already been fetched, and it will skip fetching them again.
Chaining ``prefetch_related`` calls will accumulate the lookups that are Chaining ``prefetch_related`` calls will accumulate the lookups that are
prefetched. To clear any ``prefetch_related`` behavior, pass ``None`` as a prefetched. To clear any ``prefetch_related`` behavior, pass ``None`` as a
parameter:: parameter:
>>> non_prefetched = qs.prefetch_related(None) >>> non_prefetched = qs.prefetch_related(None)
...@@ -925,6 +925,91 @@ profile for your use case! ...@@ -925,6 +925,91 @@ profile for your use case!
Note that if you use ``iterator()`` to run the query, ``prefetch_related()`` Note that if you use ``iterator()`` to run the query, ``prefetch_related()``
calls will be ignored since these two optimizations do not make sense together. calls will be ignored since these two optimizations do not make sense together.
.. versionadded:: 1.7
You can use the :class:`~django.db.models.Prefetch` object to further control
the prefetch operation.
In its simplest form ``Prefetch`` is equivalent to the traditional string based
lookups:
>>> Restaurant.objects.prefetch_related(Prefetch('pizzas__toppings'))
You can provide a custom queryset with the optional ``queryset`` argument.
This can be used to change the default ordering of the queryset:
>>> Restaurant.objects.prefetch_related(
... Prefetch('pizzas__toppings', queryset=Toppings.objects.order_by('name')))
Or to call :meth:`~django.db.models.query.QuerySet.select_related()` when
applicable to reduce the number of queries even further:
>>> Pizza.objects.prefetch_related(
... Prefetch('restaurants', queryset=Restaurant.objects.select_related('best_pizza')))
You can also assign the prefetched result to a custom attribute with the optional
``to_attr`` argument. The result will be stored directly in a list.
This allows prefetching the same relation multiple times with a different
``QuerySet``; for instance:
>>> vegetarian_pizzas = Pizza.objects.filter(vegetarian=True)
>>> Restaurant.objects.prefetch_related(
... Prefetch('pizzas', to_attr('menu')),
... Prefetch('pizzas', queryset=vegetarian_pizzas to_attr='vegetarian_menu'))
Lookups created with custom ``to_attr`` can still be traversed as usual by other
lookups:
>>> vegetarian_pizzas = Pizza.objects.filter(vegetarian=True)
>>> Restaurant.objects.prefetch_related(
... Prefetch('pizzas', queryset=vegetarian_pizzas to_attr='vegetarian_menu'),
... 'vegetarian_menu__toppings')
Using ``to_attr`` is recommended when filtering down the prefetch result as it is
less ambiguous than storing a filtered result in the related manager's cache:
>>> queryset = Pizza.objects.filter(vegetarian=True)
>>>
>>> # Recommended:
>>> restaurants = Restaurant.objects.prefetch_related(
... Prefetch('pizzas', to_attr='vegetarian_pizzas' queryset=queryset))
>>> vegetarian_pizzas = restaurants[0].vegetarian_pizzas
>>>
>>> # Not recommended:
>>> restaurants = Restaurant.objects.prefetch_related(
... Prefetch('pizzas', queryset=queryset))
>>> vegetarian_pizzas = restaurants[0].pizzas.all()
.. note::
The ordering of lookups matters.
Take the following examples:
>>> prefetch_related('pizzas__toppings', 'pizzas')
This works even though it's unordered because ``'pizzas__toppings'``
already contains all the needed information, therefore the second argument
``'pizzas'`` is actually redundant.
>>> prefetch_related('pizzas__toppings', Prefetch('pizzas', queryset=Pizza.objects.all()))
This will raise a ``ValueError`` because of the attempt to redefine the
queryset of a previously seen lookup. Note that an implicit queryset was
created to traverse ``'pizzas'`` as part of the ``'pizzas__toppings'``
lookup.
>>> prefetch_related('pizza_list__toppings', Prefetch('pizzas', to_attr='pizza_list'))
This will trigger an ``AttributeError`` because ``'pizza_list'`` doesn't exist yet
when ``'pizza_list__toppings'`` is being processed.
This consideration is not limited to the use of ``Prefetch`` objects. Some
advanced techniques may require that the lookups be performed in a
specific order to avoid creating extra queries; therefore it's recommended
to always carefully order ``prefetch_related`` arguments.
extra extra
~~~~~ ~~~~~
......
...@@ -98,6 +98,21 @@ Using a custom manager when traversing reverse relations ...@@ -98,6 +98,21 @@ Using a custom manager when traversing reverse relations
It is now possible to :ref:`specify a custom manager It is now possible to :ref:`specify a custom manager
<using-custom-reverse-manager>` when traversing a reverse relationship. <using-custom-reverse-manager>` when traversing a reverse relationship.
New ``Prefetch`` object for advanced ``prefetch_related`` operations.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The new :class:`~django.db.models.Prefetch` object allows customizing
prefetch operations.
You can specify the ``QuerySet`` used to traverse a given relation
or customize the storage location of prefetch results.
This enables things like filtering prefetched relations, calling
:meth:`~django.db.models.query.QuerySet.select_related()` from a prefetched
relation, or prefetching the same relation multiple times with different
querysets. See :meth:`~django.db.models.query.QuerySet.prefetch_related()`
for more details.
Admin shortcuts support time zones Admin shortcuts support time zones
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
...@@ -137,6 +137,9 @@ class TaggedItem(models.Model): ...@@ -137,6 +137,9 @@ class TaggedItem(models.Model):
def __str__(self): def __str__(self):
return self.tag return self.tag
class Meta:
ordering = ['id']
class Bookmark(models.Model): class Bookmark(models.Model):
url = models.URLField() url = models.URLField()
...@@ -146,6 +149,9 @@ class Bookmark(models.Model): ...@@ -146,6 +149,9 @@ class Bookmark(models.Model):
object_id_field='favorite_fkey', object_id_field='favorite_fkey',
related_name='favorite_bookmarks') related_name='favorite_bookmarks')
class Meta:
ordering = ['id']
class Comment(models.Model): class Comment(models.Model):
comment = models.TextField() comment = models.TextField()
...@@ -155,12 +161,16 @@ class Comment(models.Model): ...@@ -155,12 +161,16 @@ class Comment(models.Model):
object_pk = models.TextField() object_pk = models.TextField()
content_object = generic.GenericForeignKey(ct_field="content_type", fk_field="object_pk") content_object = generic.GenericForeignKey(ct_field="content_type", fk_field="object_pk")
class Meta:
ordering = ['id']
## Models for lookup ordering tests ## Models for lookup ordering tests
class House(models.Model): class House(models.Model):
address = models.CharField(max_length=255) address = models.CharField(max_length=255)
owner = models.ForeignKey('Person', null=True)
class Meta: class Meta:
ordering = ['id'] ordering = ['id']
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment