Kaydet (Commit) 662eea11 authored tarafından Luke Plant's avatar Luke Plant

Fixed #16937 - added `QuerySet.prefetch_related` to prefetch many related objects.

Many thanks to akaariai for lots of review and feedback, bug finding,
additional unit tests and performance testing.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16930 bcc190cf-cafb-0310-a4f2-bffc1f526a37
üst d30fbf8b
......@@ -225,11 +225,7 @@ class ReverseGenericRelatedObjectsDescriptor(object):
content_type = content_type,
content_type_field_name = self.field.content_type_field_name,
object_id_field_name = self.field.object_id_field_name,
core_filters = {
'%s__pk' % self.field.content_type_field_name: content_type.id,
'%s__exact' % self.field.object_id_field_name: instance._get_pk_val(),
}
prefetch_cache_name = self.field.attname,
)
return manager
......@@ -250,12 +246,12 @@ def create_generic_related_manager(superclass):
"""
class GenericRelatedObjectManager(superclass):
def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None,
def __init__(self, model=None, instance=None, symmetrical=None,
source_col_name=None, target_col_name=None, content_type=None,
content_type_field_name=None, object_id_field_name=None):
content_type_field_name=None, object_id_field_name=None,
prefetch_cache_name=None):
super(GenericRelatedObjectManager, self).__init__()
self.core_filters = core_filters
self.model = model
self.content_type = content_type
self.symmetrical = symmetrical
......@@ -264,11 +260,29 @@ def create_generic_related_manager(superclass):
self.target_col_name = target_col_name
self.content_type_field_name = content_type_field_name
self.object_id_field_name = object_id_field_name
self.prefetch_cache_name = prefetch_cache_name
self.pk_val = self.instance._get_pk_val()
self.core_filters = {
'%s__pk' % content_type_field_name: content_type.id,
'%s__exact' % object_id_field_name: instance._get_pk_val(),
}
def get_query_set(self):
db = self._db or router.db_for_read(self.model, instance=self.instance)
return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters)
try:
return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
except (AttributeError, KeyError):
db = self._db or router.db_for_read(self.model, instance=self.instance)
return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters)
def get_prefetch_query_set(self, instances):
db = self._db or router.db_for_read(self.model)
query = {
'%s__pk' % self.content_type_field_name: self.content_type.id,
'%s__in' % self.object_id_field_name:
[obj._get_pk_val() for obj in instances]
}
qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query)
return (qs, self.object_id_field_name, 'pk')
def add(self, *objs):
for obj in objs:
......
......@@ -432,8 +432,22 @@ class ForeignRelatedObjectsDescriptor(object):
self.model = rel_model
def get_query_set(self):
db = self._db or router.db_for_read(self.model, instance=self.instance)
return super(RelatedManager, self).get_query_set().using(db).filter(**(self.core_filters))
try:
return self.instance._prefetched_objects_cache[rel_field.related_query_name()]
except (AttributeError, KeyError):
db = self._db or router.db_for_read(self.model, instance=self.instance)
return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters)
def get_prefetch_query_set(self, instances):
"""
Return a queryset that does the bulk lookup needed
by prefetch_related functionality.
"""
db = self._db or router.db_for_read(self.model)
query = {'%s__%s__in' % (rel_field.name, attname):
[getattr(obj, attname) for obj in instances]}
qs = super(RelatedManager, self).get_query_set().using(db).filter(**query)
return (qs, rel_field.get_attname(), attname)
def add(self, *objs):
for obj in objs:
......@@ -482,25 +496,60 @@ def create_many_related_manager(superclass, rel):
"""Creates a manager that subclasses 'superclass' (which is a Manager)
and adds behavior for many-to-many related objects."""
class ManyRelatedManager(superclass):
def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None,
def __init__(self, model=None, query_field_name=None, instance=None, symmetrical=None,
source_field_name=None, target_field_name=None, reverse=False,
through=None):
through=None, prefetch_cache_name=None):
super(ManyRelatedManager, self).__init__()
self.model = model
self.core_filters = core_filters
self.query_field_name = query_field_name
self.core_filters = {'%s__pk' % query_field_name: instance._get_pk_val()}
self.instance = instance
self.symmetrical = symmetrical
self.source_field_name = source_field_name
self.target_field_name = target_field_name
self.reverse = reverse
self.through = through
self.prefetch_cache_name = prefetch_cache_name
self._pk_val = self.instance.pk
if self._pk_val is None:
raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__)
def get_query_set(self):
db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance)
return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**(self.core_filters))
try:
return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
except (AttributeError, KeyError):
db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance)
return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters)
def get_prefetch_query_set(self, instances):
"""
Returns a tuple:
(queryset of instances of self.model that are related to passed in instances
attr of returned instances needed for matching
attr of passed in instances needed for matching)
"""
from django.db import connections
db = self._db or router.db_for_read(self.model)
query = {'%s__pk__in' % self.query_field_name:
[obj._get_pk_val() for obj in instances]}
qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query)
# M2M: need to annotate the query in order to get the primary model
# that the secondary model was actually related to. We know that
# there will already be a join on the join table, so we can just add
# the select.
# For non-autocreated 'through' models, can't assume we are
# dealing with PK values.
fk = self.through._meta.get_field(self.source_field_name)
source_col = fk.column
join_table = self.through._meta.db_table
connection = connections[db]
qn = connection.ops.quote_name
qs = qs.extra(select={'_prefetch_related_val':
'%s.%s' % (qn(join_table), qn(source_col))})
select_attname = fk.rel.get_related_field().get_attname()
return (qs, '_prefetch_related_val', select_attname)
# If the ManyToMany relation has an intermediary model,
# the add and remove methods do not exist.
......@@ -683,7 +732,8 @@ class ManyRelatedObjectsDescriptor(object):
manager = self.related_manager_cls(
model=rel_model,
core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()},
query_field_name=self.related.field.name,
prefetch_cache_name=self.related.field.related_query_name(),
instance=instance,
symmetrical=False,
source_field_name=self.related.field.m2m_reverse_field_name(),
......@@ -739,7 +789,8 @@ class ReverseManyRelatedObjectsDescriptor(object):
manager = self.related_manager_cls(
model=self.field.rel.to,
core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()},
query_field_name=self.field.related_query_name(),
prefetch_cache_name=self.field.name,
instance=instance,
symmetrical=self.field.rel.symmetrical,
source_field_name=self.field.m2m_field_name(),
......
......@@ -172,6 +172,9 @@ class Manager(object):
def select_related(self, *args, **kwargs):
return self.get_query_set().select_related(*args, **kwargs)
def prefetch_related(self, *args, **kwargs):
return self.get_query_set().prefetch_related(*args, **kwargs)
def values(self, *args, **kwargs):
return self.get_query_set().values(*args, **kwargs)
......
This diff is collapsed.
......@@ -571,8 +571,6 @@ can be useful in situations where you might want to pass in either a model
manager or a ``QuerySet`` and do further filtering on the result. After calling
``all()`` on either object, you'll definitely have a ``QuerySet`` to work with.
.. _select-related:
select_related
~~~~~~~~~~~~~~
......@@ -690,6 +688,107 @@ is defined. Instead of specifying the field name, use the :attr:`related_name
A :class:`~django.db.models.OneToOneField` is not traversed in the reverse
direction if you are performing a depth-based ``select_related()`` call.
prefetch_related
~~~~~~~~~~~~~~~~
.. method:: prefetch_related(*lookups)
.. versionadded:: 1.4
Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
related many-to-many and many-to-one objects for each of the specified lookups.
This is similar to ``select_related`` for the 'many related objects' case, but
note that ``prefetch_related`` causes a separate query to be issued for each set
of related objects that you request, unlike ``select_related`` which modifies
the original query with joins in order to get the related objects. With
``prefetch_related``, the additional queries are done as soon as the QuerySet
begins to be evaluated.
For example, suppose you have these models::
class Topping(models.Model):
name = models.CharField(max_length=30)
class Pizza(models.Model):
name = models.CharField(max_length=50)
toppings = models.ManyToManyField(Topping)
def __unicode__(self):
return u"%s (%s)" % (self.name, u", ".join([topping.name
for topping in self.toppings.all()]))
and run this code::
>>> Pizza.objects.all()
[u"Hawaiian (ham, pineapple)", u"Seafood (prawns, smoked salmon)"...
The problem with this code is that it will run a query on the Toppings table for
**every** item in the Pizza ``QuerySet``. Using ``prefetch_related``, this can
be reduced to two:
>>> Pizza.objects.all().prefetch_related('toppings')
All the relevant toppings will be fetched in a single query, and used to make
``QuerySets`` that have a pre-filled cache of the relevant results. These
``QuerySets`` are then used in the ``self.toppings.all()`` calls.
Please note that use of ``prefetch_related`` will mean that the additional
queries run will **always** be executed - even if you never use the related
objects - and it always fully populates the result cache on the primary
``QuerySet`` (which can sometimes be avoided in other cases).
Also remember that, as always with QuerySets, any subsequent chained methods
will ignore previously cached results, and retrieve data using a fresh database
query. So, if you write the following:
>>> pizzas = Pizza.objects.prefetch_related('toppings')
>>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas]
...then the fact that `pizza.toppings.all()` has been prefetched will not help
you - in fact it hurts performance, since you have done a database query that
you haven't used. So use this feature with caution!
The lookups that must be supplied to this method can be any attributes on the
model instances which represent related queries that return multiple
objects. This includes attributes representing the 'many' side of ``ForeignKey``
relationships, forward and reverse ``ManyToManyField`` attributes, and also any
``GenericRelations``.
You can also use the normal join syntax to do related fields of related
fields. Suppose we have an additional model to the example above::
class Restaurant(models.Model):
pizzas = models.ManyToMany(Pizza, related_name='restaurants')
best_pizza = models.ForeignKey(Pizza, related_name='championed_by')
The following are all legal:
>>> Restaurant.objects.prefetch_related('pizzas__toppings')
This will prefetch all pizzas belonging to restaurants, and all toppings
belonging to those pizzas. This will result in a total of 3 database queries -
one for the restaurants, one for the pizzas, and one for the toppings.
>>> Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')
This will fetch the best pizza and all the toppings for the best pizza for each
restaurant. This will be done in 2 database queries - one for the restaurants
and 'best pizzas' combined (achieved through use of ``select_related``), and one
for the toppings.
Chaining ``prefetch_related`` calls will accumulate the fields that should have
this behavior applied. To clear any ``prefetch_related`` behavior, pass `None`
as a parameter::
>>> non_prefetched = qs.prefetch_related(None)
One difference when using ``prefetch_related`` is that, in some circumstances,
objects created by a query can be shared between the different objects that they
are related to i.e. a single Python model instance can appear at more than one
point in the tree of objects that are returned. Normally this behavior will not
be a problem, and will in fact save both memory and CPU time.
extra
~~~~~
......
......@@ -63,6 +63,19 @@ setup for test suites) has seen a performance benefit as a result.
See the :meth:`~django.db.models.query.QuerySet.bulk_create` docs for more
information.
``QuerySet.prefetch_related``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Analagous to :meth:`~django.db.models.query.QuerySet.select_related` but for
many-to-many relationships,
:meth:`~django.db.models.query.QuerySet.prefetch_related` has been added to
:class:`~django.db.models.query.QuerySet`. This method returns a new ``QuerySet``
that will prefetch in a single batch each of the specified related lookups as
soon as it begins to be evaluated (e.g. by iterating over it). This enables you
to fix many instances of a very common performance problem, in which your code
ends up doing O(n) database queries (or worse) if objects on your primary
``QuerySet`` each have many related objects that you also need.
HTML5
~~~~~
......
......@@ -141,10 +141,12 @@ retrieving it all in one query. This is particularly important if you have a
query that is executed in a loop, and could therefore end up doing many database
queries, when only one was needed. So:
Use ``QuerySet.select_related()``
---------------------------------
Use ``QuerySet.select_related()`` and ``prefetch_related()``
------------------------------------------------------------
Understand :ref:`QuerySet.select_related() <select-related>` thoroughly, and use it:
Understand :meth:`~django.db.models.query.QuerySet.select_related` and
:meth:`~django.db.models.query.QuerySet.prefetch_related` thoroughly, and use
them:
* in view code,
......
from django.contrib.contenttypes.models import ContentType
from django.contrib.contenttypes import generic
from django.db import models
## Basic tests
class Author(models.Model):
name = models.CharField(max_length=50, unique=True)
first_book = models.ForeignKey('Book', related_name='first_time_authors')
favorite_authors = models.ManyToManyField(
'self', through='FavoriteAuthors', symmetrical=False, related_name='favors_me')
def __unicode__(self):
return self.name
class Meta:
ordering = ['id']
class AuthorWithAge(Author):
author = models.OneToOneField(Author, parent_link=True)
age = models.IntegerField()
class FavoriteAuthors(models.Model):
author = models.ForeignKey(Author, to_field='name', related_name='i_like')
likes_author = models.ForeignKey(Author, to_field='name', related_name='likes_me')
class Meta:
ordering = ['id']
class AuthorAddress(models.Model):
author = models.ForeignKey(Author, to_field='name', related_name='addresses')
address = models.TextField()
class Meta:
ordering = ['id']
def __unicode__(self):
return self.address
class Book(models.Model):
title = models.CharField(max_length=255)
authors = models.ManyToManyField(Author, related_name='books')
def __unicode__(self):
return self.title
class Meta:
ordering = ['id']
class BookWithYear(Book):
book = models.OneToOneField(Book, parent_link=True)
published_year = models.IntegerField()
aged_authors = models.ManyToManyField(
AuthorWithAge, related_name='books_with_year')
class Reader(models.Model):
name = models.CharField(max_length=50)
books_read = models.ManyToManyField(Book, related_name='read_by')
def __unicode__(self):
return self.name
class Meta:
ordering = ['id']
## Models for default manager tests
class Qualification(models.Model):
name = models.CharField(max_length=10)
class Meta:
ordering = ['id']
class TeacherManager(models.Manager):
def get_query_set(self):
return super(TeacherManager, self).get_query_set().prefetch_related('qualifications')
class Teacher(models.Model):
name = models.CharField(max_length=50)
qualifications = models.ManyToManyField(Qualification)
objects = TeacherManager()
def __unicode__(self):
return "%s (%s)" % (self.name, ", ".join(q.name for q in self.qualifications.all()))
class Meta:
ordering = ['id']
class Department(models.Model):
name = models.CharField(max_length=50)
teachers = models.ManyToManyField(Teacher)
class Meta:
ordering = ['id']
## Generic relation tests
class TaggedItem(models.Model):
tag = models.SlugField()
content_type = models.ForeignKey(ContentType, related_name="taggeditem_set2")
object_id = models.PositiveIntegerField()
content_object = generic.GenericForeignKey('content_type', 'object_id')
def __unicode__(self):
return self.tag
class Bookmark(models.Model):
url = models.URLField()
tags = generic.GenericRelation(TaggedItem)
## Models for lookup ordering tests
class House(models.Model):
address = models.CharField(max_length=255)
class Meta:
ordering = ['id']
class Room(models.Model):
name = models.CharField(max_length=50)
house = models.ForeignKey(House, related_name='rooms')
class Meta:
ordering = ['id']
class Person(models.Model):
name = models.CharField(max_length=50)
houses = models.ManyToManyField(House, related_name='occupants')
@property
def primary_house(self):
# Assume business logic forces every person to have at least one house.
return sorted(self.houses.all(), key=lambda house: -house.rooms.count())[0]
class Meta:
ordering = ['id']
## Models for nullable FK tests
class Employee(models.Model):
name = models.CharField(max_length=50)
boss = models.ForeignKey('self', null=True,
related_name='serfs')
def __unicode__(self):
return self.name
class Meta:
ordering = ['id']
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment