Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
D
django
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
django
Commits
edee5a8d
Kaydet (Commit)
edee5a8d
authored
Haz 01, 2017
tarafından
François Freitag
Kaydeden (comit)
Tim Graham
Haz 01, 2017
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Fixed #27639 -- Added chunk_size parameter to QuerySet.iterator().
üst
bf50ae82
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
85 additions
and
11 deletions
+85
-11
query.py
django/db/models/query.py
+8
-5
compiler.py
django/db/models/sql/compiler.py
+5
-5
querysets.txt
docs/ref/models/querysets.txt
+21
-1
2.0.txt
docs/releases/2.0.txt
+12
-0
test_iterator.py
tests/queries/test_iterator.py
+39
-0
No files found.
django/db/models/query.py
Dosyayı görüntüle @
edee5a8d
...
...
@@ -20,7 +20,7 @@ from django.db.models.expressions import F
from
django.db.models.fields
import
AutoField
from
django.db.models.functions
import
Trunc
from
django.db.models.query_utils
import
InvalidQuery
,
Q
from
django.db.models.sql.constants
import
CURSOR
from
django.db.models.sql.constants
import
CURSOR
,
GET_ITERATOR_CHUNK_SIZE
from
django.utils
import
timezone
from
django.utils.functional
import
cached_property
,
partition
from
django.utils.version
import
get_version
...
...
@@ -33,9 +33,10 @@ EmptyResultSet = sql.EmptyResultSet
class
BaseIterable
:
def
__init__
(
self
,
queryset
,
chunked_fetch
=
False
):
def
__init__
(
self
,
queryset
,
chunked_fetch
=
False
,
chunk_size
=
GET_ITERATOR_CHUNK_SIZE
):
self
.
queryset
=
queryset
self
.
chunked_fetch
=
chunked_fetch
self
.
chunk_size
=
chunk_size
class
ModelIterable
(
BaseIterable
):
...
...
@@ -47,7 +48,7 @@ class ModelIterable(BaseIterable):
compiler
=
queryset
.
query
.
get_compiler
(
using
=
db
)
# Execute the query. This will also fill compiler.select, klass_info,
# and annotations.
results
=
compiler
.
execute_sql
(
chunked_fetch
=
self
.
chunked_fetch
)
results
=
compiler
.
execute_sql
(
chunked_fetch
=
self
.
chunked_fetch
,
chunk_size
=
self
.
chunk_size
)
select
,
klass_info
,
annotation_col_map
=
(
compiler
.
select
,
compiler
.
klass_info
,
compiler
.
annotation_col_map
)
model_cls
=
klass_info
[
'model'
]
...
...
@@ -301,13 +302,15 @@ class QuerySet:
# METHODS THAT DO DATABASE QUERIES #
####################################
def
iterator
(
self
):
def
iterator
(
self
,
chunk_size
=
2000
):
"""
An iterator over the results from applying this QuerySet to the
database.
"""
if
chunk_size
<=
0
:
raise
ValueError
(
'Chunk size must be strictly positive.'
)
use_chunked_fetch
=
not
connections
[
self
.
db
]
.
settings_dict
.
get
(
'DISABLE_SERVER_SIDE_CURSORS'
)
return
iter
(
self
.
_iterable_class
(
self
,
chunked_fetch
=
use_chunked_fetch
))
return
iter
(
self
.
_iterable_class
(
self
,
chunked_fetch
=
use_chunked_fetch
,
chunk_size
=
chunk_size
))
def
aggregate
(
self
,
*
args
,
**
kwargs
):
"""
...
...
django/db/models/sql/compiler.py
Dosyayı görüntüle @
edee5a8d
...
...
@@ -883,7 +883,7 @@ class SQLCompiler:
self
.
query
.
set_extra_mask
([
'a'
])
return
bool
(
self
.
execute_sql
(
SINGLE
))
def
execute_sql
(
self
,
result_type
=
MULTI
,
chunked_fetch
=
False
):
def
execute_sql
(
self
,
result_type
=
MULTI
,
chunked_fetch
=
False
,
chunk_size
=
GET_ITERATOR_CHUNK_SIZE
):
"""
Run the query against the database and return the result(s). The
return value is a single data item if result_type is SINGLE, or an
...
...
@@ -937,7 +937,8 @@ class SQLCompiler:
result
=
cursor_iter
(
cursor
,
self
.
connection
.
features
.
empty_fetchmany_value
,
self
.
col_count
self
.
col_count
,
chunk_size
,
)
if
not
chunked_fetch
and
not
self
.
connection
.
features
.
can_use_chunked_reads
:
try
:
...
...
@@ -1298,14 +1299,13 @@ class SQLAggregateCompiler(SQLCompiler):
return
sql
,
params
def
cursor_iter
(
cursor
,
sentinel
,
col_count
):
def
cursor_iter
(
cursor
,
sentinel
,
col_count
,
itersize
):
"""
Yield blocks of rows from a cursor and ensure the cursor is closed when
done.
"""
try
:
for
rows
in
iter
((
lambda
:
cursor
.
fetchmany
(
GET_ITERATOR_CHUNK_SIZE
)),
sentinel
):
for
rows
in
iter
((
lambda
:
cursor
.
fetchmany
(
itersize
)),
sentinel
):
yield
[
r
[
0
:
col_count
]
for
r
in
rows
]
finally
:
cursor
.
close
()
docs/ref/models/querysets.txt
Dosyayı görüntüle @
edee5a8d
...
...
@@ -2004,7 +2004,7 @@ If you pass ``in_bulk()`` an empty list, you'll get an empty dictionary.
``iterator()``
~~~~~~~~~~~~~~
.. method:: iterator()
.. method:: iterator(
chunk_size=2000
)
Evaluates the ``QuerySet`` (by performing the query) and returns an iterator
(see :pep:`234`) over the results. A ``QuerySet`` typically caches its results
...
...
@@ -2033,6 +2033,11 @@ set into memory.
The Oracle database driver always uses server-side cursors.
With server-side cursors, the ``chunk_size`` parameter specifies the number of
results to cache at the database driver level. Fetching bigger chunks
diminishes the number of round trips between the database driver and the
database, at the expense of memory.
On PostgreSQL, server-side cursors will only be used when the
:setting:`DISABLE_SERVER_SIDE_CURSORS <DATABASE-DISABLE_SERVER_SIDE_CURSORS>`
setting is ``False``. Read :ref:`transaction-pooling-server-side-cursors` if
...
...
@@ -2048,10 +2053,25 @@ drivers load the entire result set into memory. The result set is then
transformed into Python row objects by the database adapter using the
``fetchmany()`` method defined in :pep:`249`.
The ``chunk_size`` parameter controls the size of batches Django retrieves from
the database driver. Larger batches decrease the overhead of communicating with
the database driver at the expense of a slight increase in memory consumption.
The default value of ``chunk_size``, 2000, comes from `a calculation on the
psycopg mailing list <https://www.postgresql.org/message-id/4D2F2C71.8080805%40dndg.it>`_:
Assuming rows of 10-20 columns with a mix of textual and numeric data, 2000
is going to fetch less than 100KB of data, which seems a good compromise
between the number of rows transferred and the data discarded if the loop
is exited early.
.. versionchanged:: 1.11
PostgreSQL support for server-side cursors was added.
.. versionchanged:: 2.0
The ``chunk_size`` parameter was added.
``latest()``
~~~~~~~~~~~~
...
...
docs/releases/2.0.txt
Dosyayı görüntüle @
edee5a8d
...
...
@@ -214,6 +214,11 @@ Models
.. _`identity columns`: https://docs.oracle.com/database/121/DRDAA/migr_tools_feat.htm#DRDAA109
* The new ``chunk_size`` parameter of :meth:`.QuerySet.iterator` controls the
number of rows fetched by the Python database client when streaming results
from the database. For databases that don't support server-side cursors, it
controls the number of results Django fetches from the database adapter.
Requests and Responses
~~~~~~~~~~~~~~~~~~~~~~
...
...
@@ -280,6 +285,13 @@ Database backend API
attribute with the name of the database that your backend works with. Django
may use it in various messages, such as in system checks.
* To improve performance when streaming large result sets from the database,
:meth:`.QuerySet.iterator` now fetches 2000 rows at a time instead of 100.
The old behavior can be restored using the ``chunk_size`` parameter. For
example::
Book.objects.iterator(chunk_size=100)
Dropped support for Oracle 11.2
-------------------------------
...
...
tests/queries/test_iterator.py
0 → 100644
Dosyayı görüntüle @
edee5a8d
import
datetime
from
unittest
import
mock
from
django.db.models.sql.compiler
import
cursor_iter
from
django.test
import
TestCase
from
.models
import
Article
class
QuerySetIteratorTests
(
TestCase
):
itersize_index_in_mock_args
=
3
@classmethod
def
setUpTestData
(
cls
):
Article
.
objects
.
create
(
name
=
'Article 1'
,
created
=
datetime
.
datetime
.
now
())
Article
.
objects
.
create
(
name
=
'Article 2'
,
created
=
datetime
.
datetime
.
now
())
def
test_iterator_invalid_chunk_size
(
self
):
for
size
in
(
0
,
-
1
):
with
self
.
subTest
(
size
=
size
):
with
self
.
assertRaisesMessage
(
ValueError
,
'Chunk size must be strictly positive.'
):
Article
.
objects
.
iterator
(
chunk_size
=
size
)
def
test_default_iterator_chunk_size
(
self
):
qs
=
Article
.
objects
.
iterator
()
with
mock
.
patch
(
'django.db.models.sql.compiler.cursor_iter'
,
side_effect
=
cursor_iter
)
as
cursor_iter_mock
:
next
(
qs
)
self
.
assertEqual
(
cursor_iter_mock
.
call_count
,
1
)
mock_args
,
_mock_kwargs
=
cursor_iter_mock
.
call_args
self
.
assertEqual
(
mock_args
[
self
.
itersize_index_in_mock_args
],
2000
)
def
test_iterator_chunk_size
(
self
):
batch_size
=
3
qs
=
Article
.
objects
.
iterator
(
chunk_size
=
batch_size
)
with
mock
.
patch
(
'django.db.models.sql.compiler.cursor_iter'
,
side_effect
=
cursor_iter
)
as
cursor_iter_mock
:
next
(
qs
)
self
.
assertEqual
(
cursor_iter_mock
.
call_count
,
1
)
mock_args
,
_mock_kwargs
=
cursor_iter_mock
.
call_args
self
.
assertEqual
(
mock_args
[
self
.
itersize_index_in_mock_args
],
batch_size
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment