Kaydet (Commit) 01b9c3d5 authored tarafından Anssi Kääriäinen's avatar Anssi Kääriäinen

Fixed #16715 -- Fixed join promotion logic for nested nullable FKs

The joins for nested nullable foreign keys were often created as INNER
when they should have been OUTER joins. The reason was that only the
first join in the chain was promoted correctly. There were also issues
with select_related etc.

The basic structure for this problem was:
  A -[nullable]-> B -[nonnull]-> C

And the basic problem was that the A->B join was correctly LOUTER,
the B->C join not.

The major change taken in this patch is that now if we promote a join
A->B, we will automatically promote joins B->X for all X in the query.
Also, we now make sure there aren't ever join chains like:
   a LOUTER b INNER c
If the a -> b needs to be LOUTER, then the INNER at the end of the
chain will cancel the LOUTER join and we have a broken query.

Sebastian reported this problem and did also major portions of the
patch.
üst d7a2e816
...@@ -470,9 +470,7 @@ class SQLCompiler(object): ...@@ -470,9 +470,7 @@ class SQLCompiler(object):
# Must use left outer joins for nullable fields and their relations. # Must use left outer joins for nullable fields and their relations.
# Ordering or distinct must not affect the returned set, and INNER # Ordering or distinct must not affect the returned set, and INNER
# JOINS for nullable fields could do this. # JOINS for nullable fields could do this.
if joins_to_promote: self.query.promote_joins(joins_to_promote)
self.query.promote_alias_chain(joins_to_promote,
self.query.alias_map[joins_to_promote[0]].join_type == self.query.LOUTER)
return field, col, alias, joins, opts return field, col, alias, joins, opts
def _final_join_removal(self, col, alias): def _final_join_removal(self, col, alias):
...@@ -645,8 +643,6 @@ class SQLCompiler(object): ...@@ -645,8 +643,6 @@ class SQLCompiler(object):
alias_chain.append(alias) alias_chain.append(alias)
for (dupe_opts, dupe_col) in dupe_set: for (dupe_opts, dupe_col) in dupe_set:
self.query.update_dupe_avoidance(dupe_opts, dupe_col, alias) self.query.update_dupe_avoidance(dupe_opts, dupe_col, alias)
if self.query.alias_map[root_alias].join_type == self.query.LOUTER:
self.query.promote_alias_chain(alias_chain, True)
else: else:
alias = root_alias alias = root_alias
...@@ -663,8 +659,6 @@ class SQLCompiler(object): ...@@ -663,8 +659,6 @@ class SQLCompiler(object):
columns, aliases = self.get_default_columns(start_alias=alias, columns, aliases = self.get_default_columns(start_alias=alias,
opts=f.rel.to._meta, as_pairs=True) opts=f.rel.to._meta, as_pairs=True)
self.query.related_select_cols.extend(columns) self.query.related_select_cols.extend(columns)
if self.query.alias_map[alias].join_type == self.query.LOUTER:
self.query.promote_alias_chain(aliases, True)
self.query.related_select_fields.extend(f.rel.to._meta.fields) self.query.related_select_fields.extend(f.rel.to._meta.fields)
if restricted: if restricted:
next = requested.get(f.name, {}) next = requested.get(f.name, {})
...@@ -738,7 +732,9 @@ class SQLCompiler(object): ...@@ -738,7 +732,9 @@ class SQLCompiler(object):
self.query.related_select_fields.extend(model._meta.fields) self.query.related_select_fields.extend(model._meta.fields)
next = requested.get(f.related_query_name(), {}) next = requested.get(f.related_query_name(), {})
new_nullable = f.null or None # Use True here because we are looking at the _reverse_ side of
# the relation, which is always nullable.
new_nullable = True
self.fill_related_selections(model._meta, table, cur_depth+1, self.fill_related_selections(model._meta, table, cur_depth+1,
used, next, restricted, new_nullable) used, next, restricted, new_nullable)
......
...@@ -505,7 +505,7 @@ class Query(object): ...@@ -505,7 +505,7 @@ class Query(object):
# Again, some of the tables won't have aliases due to # Again, some of the tables won't have aliases due to
# the trimming of unnecessary tables. # the trimming of unnecessary tables.
if self.alias_refcount.get(alias) or rhs.alias_refcount.get(alias): if self.alias_refcount.get(alias) or rhs.alias_refcount.get(alias):
self.promote_alias(alias, True) self.promote_joins([alias], True)
# Now relabel a copy of the rhs where-clause and add it to the current # Now relabel a copy of the rhs where-clause and add it to the current
# one. # one.
...@@ -682,32 +682,38 @@ class Query(object): ...@@ -682,32 +682,38 @@ class Query(object):
""" Decreases the reference count for this alias. """ """ Decreases the reference count for this alias. """
self.alias_refcount[alias] -= amount self.alias_refcount[alias] -= amount
def promote_alias(self, alias, unconditional=False): def promote_joins(self, aliases, unconditional=False):
""" """
Promotes the join type of an alias to an outer join if it's possible Promotes recursively the join type of given aliases and its children to
for the join to contain NULL values on the left. If 'unconditional' is an outer join. If 'unconditional' is False, the join is only promoted if
False, the join is only promoted if it is nullable, otherwise it is it is nullable or the parent join is an outer join.
always promoted.
Note about join promotion: When promoting any alias, we make sure all
Returns True if the join was promoted by this call. joins which start from that alias are promoted, too. When adding a join
""" in join(), we make sure any join added to already existing LOUTER join
if ((unconditional or self.alias_map[alias].nullable) and is generated as LOUTER. This ensures we don't ever have broken join
self.alias_map[alias].join_type != self.LOUTER): chains which contain first a LOUTER join, then an INNER JOIN, that is
data = self.alias_map[alias] this kind of join should never be generated: a LOUTER b INNER c. The
data = data._replace(join_type=self.LOUTER) reason for avoiding this type of join chain is that the INNER after
self.alias_map[alias] = data the LOUTER will effectively remove any effect the LOUTER had.
return True """
return False aliases = list(aliases)
while aliases:
def promote_alias_chain(self, chain, must_promote=False): alias = aliases.pop(0)
""" parent_alias = self.alias_map[alias].lhs_alias
Walks along a chain of aliases, promoting the first nullable join and parent_louter = (parent_alias
any joins following that. If 'must_promote' is True, all the aliases in and self.alias_map[parent_alias].join_type == self.LOUTER)
the chain are promoted. already_louter = self.alias_map[alias].join_type == self.LOUTER
""" if ((unconditional or self.alias_map[alias].nullable
for alias in chain: or parent_louter) and not already_louter):
if self.promote_alias(alias, must_promote): data = self.alias_map[alias]._replace(join_type=self.LOUTER)
must_promote = True self.alias_map[alias] = data
# Join type of 'alias' changed, so re-examine all aliases that
# refer to this one.
aliases.extend(
join for join in self.alias_map.keys()
if (self.alias_map[join].lhs_alias == alias
and join not in aliases))
def reset_refcounts(self, to_counts): def reset_refcounts(self, to_counts):
""" """
...@@ -726,19 +732,10 @@ class Query(object): ...@@ -726,19 +732,10 @@ class Query(object):
then and which ones haven't been used and promotes all of those then and which ones haven't been used and promotes all of those
aliases, plus any children of theirs in the alias tree, to outer joins. aliases, plus any children of theirs in the alias tree, to outer joins.
""" """
# FIXME: There's some (a lot of!) overlap with the similar OR promotion
# in add_filter(). It's not quite identical, but is very similar. So
# pulling out the common bits is something for later.
considered = {}
for alias in self.tables: for alias in self.tables:
if alias not in used_aliases: if alias in used_aliases and (alias not in initial_refcounts or
continue
if (alias not in initial_refcounts or
self.alias_refcount[alias] == initial_refcounts[alias]): self.alias_refcount[alias] == initial_refcounts[alias]):
parent = self.alias_map[alias].lhs_alias self.promote_joins([alias])
must_promote = considered.get(parent, False)
promoted = self.promote_alias(alias, must_promote)
considered[alias] = must_promote or promoted
def change_aliases(self, change_map): def change_aliases(self, change_map):
""" """
...@@ -875,6 +872,9 @@ class Query(object): ...@@ -875,6 +872,9 @@ class Query(object):
LOUTER join type. This is used when joining certain types of querysets LOUTER join type. This is used when joining certain types of querysets
and Q-objects together. and Q-objects together.
A join is always created as LOUTER if the lhs alias is LOUTER to make
sure we do not generate chains like a LOUTER b INNER c.
If 'nullable' is True, the join can potentially involve NULL values and If 'nullable' is True, the join can potentially involve NULL values and
is a candidate for promotion (to "left outer") when combining querysets. is a candidate for promotion (to "left outer") when combining querysets.
""" """
...@@ -900,8 +900,8 @@ class Query(object): ...@@ -900,8 +900,8 @@ class Query(object):
if self.alias_map[alias].lhs_alias != lhs: if self.alias_map[alias].lhs_alias != lhs:
continue continue
self.ref_alias(alias) self.ref_alias(alias)
if promote: if promote or (lhs and self.alias_map[lhs].join_type == self.LOUTER):
self.promote_alias(alias) self.promote_joins([alias])
return alias return alias
# No reuse is possible, so we need a new alias. # No reuse is possible, so we need a new alias.
...@@ -1009,8 +1009,7 @@ class Query(object): ...@@ -1009,8 +1009,7 @@ class Query(object):
# If the aggregate references a model or field that requires a join, # If the aggregate references a model or field that requires a join,
# those joins must be LEFT OUTER - empty join rows must be returned # those joins must be LEFT OUTER - empty join rows must be returned
# in order for zeros to be returned for those aggregates. # in order for zeros to be returned for those aggregates.
for column_alias in join_list: self.promote_joins(join_list, True)
self.promote_alias(column_alias, unconditional=True)
col = (join_list[-1], col) col = (join_list[-1], col)
else: else:
...@@ -1129,7 +1128,7 @@ class Query(object): ...@@ -1129,7 +1128,7 @@ class Query(object):
# If the comparison is against NULL, we may need to use some left # If the comparison is against NULL, we may need to use some left
# outer joins when creating the join chain. This is only done when # outer joins when creating the join chain. This is only done when
# needed, as it's less efficient at the database level. # needed, as it's less efficient at the database level.
self.promote_alias_chain(join_list) self.promote_joins(join_list)
join_promote = True join_promote = True
# Process the join list to see if we can remove any inner joins from # Process the join list to see if we can remove any inner joins from
...@@ -1160,16 +1159,16 @@ class Query(object): ...@@ -1160,16 +1159,16 @@ class Query(object):
# This means that we are dealing with two different query # This means that we are dealing with two different query
# subtrees, so we don't need to do any join promotion. # subtrees, so we don't need to do any join promotion.
continue continue
join_promote = join_promote or self.promote_alias(join, unconditional) join_promote = join_promote or self.promote_joins([join], unconditional)
if table != join: if table != join:
table_promote = self.promote_alias(table) table_promote = self.promote_joins([table])
# We only get here if we have found a table that exists # We only get here if we have found a table that exists
# in the join list, but isn't on the original tables list. # in the join list, but isn't on the original tables list.
# This means we've reached the point where we only have # This means we've reached the point where we only have
# new tables, so we can break out of this promotion loop. # new tables, so we can break out of this promotion loop.
break break
self.promote_alias_chain(join_it, join_promote) self.promote_joins(join_it, join_promote)
self.promote_alias_chain(table_it, table_promote or join_promote) self.promote_joins(table_it, table_promote or join_promote)
if having_clause or force_having: if having_clause or force_having:
if (alias, col) not in self.group_by: if (alias, col) not in self.group_by:
...@@ -1181,7 +1180,7 @@ class Query(object): ...@@ -1181,7 +1180,7 @@ class Query(object):
connector) connector)
if negate: if negate:
self.promote_alias_chain(join_list) self.promote_joins(join_list)
if lookup_type != 'isnull': if lookup_type != 'isnull':
if len(join_list) > 1: if len(join_list) > 1:
for alias in join_list: for alias in join_list:
...@@ -1655,7 +1654,7 @@ class Query(object): ...@@ -1655,7 +1654,7 @@ class Query(object):
final_alias = join.lhs_alias final_alias = join.lhs_alias
col = join.lhs_join_col col = join.lhs_join_col
joins = joins[:-1] joins = joins[:-1]
self.promote_alias_chain(joins[1:]) self.promote_joins(joins[1:])
self.select.append((final_alias, col)) self.select.append((final_alias, col))
self.select_fields.append(field) self.select_fields.append(field)
except MultiJoin: except MultiJoin:
......
from django.db import models
class Person(models.Model):
name = models.CharField(max_length=200)
class Movie(models.Model):
title = models.CharField(max_length=200)
director = models.ForeignKey(Person)
class Event(models.Model):
pass
class Screening(Event):
movie = models.ForeignKey(Movie)
class ScreeningNullFK(Event):
movie = models.ForeignKey(Movie, null=True)
class Package(models.Model):
screening = models.ForeignKey(Screening, null=True)
class PackageNullFK(models.Model):
screening = models.ForeignKey(ScreeningNullFK, null=True)
from __future__ import absolute_import
from django.test import TestCase
from .models import Person, Movie, Event, Screening, ScreeningNullFK, Package, PackageNullFK
# These are tests for #16715. The basic scheme is always the same: 3 models with
# 2 relations. The first relation may be null, while the second is non-nullable.
# In some cases, Django would pick the wrong join type for the second relation,
# resulting in missing objects in the queryset.
#
# Model A
# | (Relation A/B : nullable)
# Model B
# | (Relation B/C : non-nullable)
# Model C
#
# Because of the possibility of NULL rows resulting from the LEFT OUTER JOIN
# between Model A and Model B (i.e. instances of A without reference to B),
# the second join must also be LEFT OUTER JOIN, so that we do not ignore
# instances of A that do not reference B.
#
# Relation A/B can either be an explicit foreign key or an implicit reverse
# relation such as introduced by one-to-one relations (through multi-table
# inheritance).
class NestedForeignKeysTests(TestCase):
def setUp(self):
self.director = Person.objects.create(name='Terry Gilliam / Terry Jones')
self.movie = Movie.objects.create(title='Monty Python and the Holy Grail', director=self.director)
# This test failed in #16715 because in some cases INNER JOIN was selected
# for the second foreign key relation instead of LEFT OUTER JOIN.
def testInheritance(self):
some_event = Event.objects.create()
screening = Screening.objects.create(movie=self.movie)
self.assertEqual(len(Event.objects.all()), 2)
self.assertEqual(len(Event.objects.select_related('screening')), 2)
# This failed.
self.assertEqual(len(Event.objects.select_related('screening__movie')), 2)
self.assertEqual(len(Event.objects.values()), 2)
self.assertEqual(len(Event.objects.values('screening__pk')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__pk')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__title')), 2)
# This failed.
self.assertEqual(len(Event.objects.values('screening__movie__pk', 'screening__movie__title')), 2)
# Simple filter/exclude queries for good measure.
self.assertEqual(Event.objects.filter(screening__movie=self.movie).count(), 1)
self.assertEqual(Event.objects.exclude(screening__movie=self.movie).count(), 1)
# These all work because the second foreign key in the chain has null=True.
def testInheritanceNullFK(self):
some_event = Event.objects.create()
screening = ScreeningNullFK.objects.create(movie=None)
screening_with_movie = ScreeningNullFK.objects.create(movie=self.movie)
self.assertEqual(len(Event.objects.all()), 3)
self.assertEqual(len(Event.objects.select_related('screeningnullfk')), 3)
self.assertEqual(len(Event.objects.select_related('screeningnullfk__movie')), 3)
self.assertEqual(len(Event.objects.values()), 3)
self.assertEqual(len(Event.objects.values('screeningnullfk__pk')), 3)
self.assertEqual(len(Event.objects.values('screeningnullfk__movie__pk')), 3)
self.assertEqual(len(Event.objects.values('screeningnullfk__movie__title')), 3)
self.assertEqual(len(Event.objects.values('screeningnullfk__movie__pk', 'screeningnullfk__movie__title')), 3)
self.assertEqual(Event.objects.filter(screeningnullfk__movie=self.movie).count(), 1)
self.assertEqual(Event.objects.exclude(screeningnullfk__movie=self.movie).count(), 2)
# This test failed in #16715 because in some cases INNER JOIN was selected
# for the second foreign key relation instead of LEFT OUTER JOIN.
def testExplicitForeignKey(self):
package = Package.objects.create()
screening = Screening.objects.create(movie=self.movie)
package_with_screening = Package.objects.create(screening=screening)
self.assertEqual(len(Package.objects.all()), 2)
self.assertEqual(len(Package.objects.select_related('screening')), 2)
self.assertEqual(len(Package.objects.select_related('screening__movie')), 2)
self.assertEqual(len(Package.objects.values()), 2)
self.assertEqual(len(Package.objects.values('screening__pk')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__pk')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__title')), 2)
# This failed.
self.assertEqual(len(Package.objects.values('screening__movie__pk', 'screening__movie__title')), 2)
self.assertEqual(Package.objects.filter(screening__movie=self.movie).count(), 1)
self.assertEqual(Package.objects.exclude(screening__movie=self.movie).count(), 1)
# These all work because the second foreign key in the chain has null=True.
def testExplicitForeignKeyNullFK(self):
package = PackageNullFK.objects.create()
screening = ScreeningNullFK.objects.create(movie=None)
screening_with_movie = ScreeningNullFK.objects.create(movie=self.movie)
package_with_screening = PackageNullFK.objects.create(screening=screening)
package_with_screening_with_movie = PackageNullFK.objects.create(screening=screening_with_movie)
self.assertEqual(len(PackageNullFK.objects.all()), 3)
self.assertEqual(len(PackageNullFK.objects.select_related('screening')), 3)
self.assertEqual(len(PackageNullFK.objects.select_related('screening__movie')), 3)
self.assertEqual(len(PackageNullFK.objects.values()), 3)
self.assertEqual(len(PackageNullFK.objects.values('screening__pk')), 3)
self.assertEqual(len(PackageNullFK.objects.values('screening__movie__pk')), 3)
self.assertEqual(len(PackageNullFK.objects.values('screening__movie__title')), 3)
self.assertEqual(len(PackageNullFK.objects.values('screening__movie__pk', 'screening__movie__title')), 3)
self.assertEqual(PackageNullFK.objects.filter(screening__movie=self.movie).count(), 1)
self.assertEqual(PackageNullFK.objects.exclude(screening__movie=self.movie).count(), 2)
# Some additional tests for #16715. The only difference is the depth of the
# nesting as we now use 4 models instead of 3 (and thus 3 relations). This
# checks if promotion of join types works for deeper nesting too.
class DeeplyNestedForeignKeysTests(TestCase):
def setUp(self):
self.director = Person.objects.create(name='Terry Gilliam / Terry Jones')
self.movie = Movie.objects.create(title='Monty Python and the Holy Grail', director=self.director)
def testInheritance(self):
some_event = Event.objects.create()
screening = Screening.objects.create(movie=self.movie)
self.assertEqual(len(Event.objects.all()), 2)
self.assertEqual(len(Event.objects.select_related('screening__movie__director')), 2)
self.assertEqual(len(Event.objects.values()), 2)
self.assertEqual(len(Event.objects.values('screening__movie__director__pk')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__director__name')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__director__pk', 'screening__movie__director__name')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__pk', 'screening__movie__director__pk')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__pk', 'screening__movie__director__name')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__title', 'screening__movie__director__pk')), 2)
self.assertEqual(len(Event.objects.values('screening__movie__title', 'screening__movie__director__name')), 2)
self.assertEqual(Event.objects.filter(screening__movie__director=self.director).count(), 1)
self.assertEqual(Event.objects.exclude(screening__movie__director=self.director).count(), 1)
def testExplicitForeignKey(self):
package = Package.objects.create()
screening = Screening.objects.create(movie=self.movie)
package_with_screening = Package.objects.create(screening=screening)
self.assertEqual(len(Package.objects.all()), 2)
self.assertEqual(len(Package.objects.select_related('screening__movie__director')), 2)
self.assertEqual(len(Package.objects.values()), 2)
self.assertEqual(len(Package.objects.values('screening__movie__director__pk')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__director__name')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__director__pk', 'screening__movie__director__name')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__pk', 'screening__movie__director__pk')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__pk', 'screening__movie__director__name')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__title', 'screening__movie__director__pk')), 2)
self.assertEqual(len(Package.objects.values('screening__movie__title', 'screening__movie__director__name')), 2)
self.assertEqual(Package.objects.filter(screening__movie__director=self.director).count(), 1)
self.assertEqual(Package.objects.exclude(screening__movie__director=self.director).count(), 1)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment