Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
04248a8d
Kaydet (Commit)
04248a8d
authored
Eki 12, 2010
tarafından
Antoine Pitrou
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #3873: Speed up unpickling from file objects which have a peek()
method.
üst
b0182c8c
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
25 deletions
+138
-25
pickletester.py
Lib/test/pickletester.py
+42
-1
NEWS
Misc/NEWS
+3
-0
_pickle.c
Modules/_pickle.c
+93
-24
No files found.
Lib/test/pickletester.py
Dosyayı görüntüle @
04248a8d
...
...
@@ -30,6 +30,21 @@ def count_opcode(code, pickle):
n
+=
1
return
n
class
UnseekableIO
(
io
.
BytesIO
):
def
peek
(
self
,
*
args
):
raise
NotImplementedError
def
seekable
(
self
):
return
False
def
seek
(
self
,
*
args
):
raise
io
.
UnsupportedOperation
def
tell
(
self
):
raise
io
.
UnsupportedOperation
# We can't very well test the extension registry without putting known stuff
# in it, but we have to be careful to restore its original state. Code
# should do this:
...
...
@@ -1072,9 +1087,10 @@ class AbstractPickleTests(unittest.TestCase):
# Test the correctness of internal buffering routines when handling
# large data.
for
proto
in
protocols
:
data
=
(
1
,
b
'x'
*
(
256
*
1024
)
)
data
=
(
1
,
min
,
b
'xy'
*
(
30
*
1024
),
len
)
dumped
=
self
.
dumps
(
data
,
proto
)
loaded
=
self
.
loads
(
dumped
)
self
.
assertEqual
(
len
(
loaded
),
len
(
data
))
self
.
assertEqual
(
loaded
,
data
)
...
...
@@ -1373,6 +1389,31 @@ class AbstractPicklerUnpicklerObjectTests(unittest.TestCase):
f
.
seek
(
0
)
self
.
assertEqual
(
unpickler
.
load
(),
data2
)
def
_check_multiple_unpicklings
(
self
,
ioclass
):
for
proto
in
protocols
:
data1
=
[(
x
,
str
(
x
))
for
x
in
range
(
2000
)]
+
[
b
"abcde"
,
len
]
f
=
ioclass
()
pickler
=
self
.
pickler_class
(
f
,
protocol
=
proto
)
pickler
.
dump
(
data1
)
pickled
=
f
.
getvalue
()
N
=
5
f
=
ioclass
(
pickled
*
N
)
unpickler
=
self
.
unpickler_class
(
f
)
for
i
in
range
(
N
):
if
f
.
seekable
():
pos
=
f
.
tell
()
self
.
assertEqual
(
unpickler
.
load
(),
data1
)
if
f
.
seekable
():
self
.
assertEqual
(
f
.
tell
(),
pos
+
len
(
pickled
))
self
.
assertRaises
(
EOFError
,
unpickler
.
load
)
def
test_multiple_unpicklings_seekable
(
self
):
self
.
_check_multiple_unpicklings
(
io
.
BytesIO
)
def
test_multiple_unpicklings_unseekable
(
self
):
self
.
_check_multiple_unpicklings
(
UnseekableIO
)
if
__name__
==
"__main__"
:
# Print some stuff that can be used to rewrite DATA{0,1,2}
...
...
Misc/NEWS
Dosyayı görüntüle @
04248a8d
...
...
@@ -13,6 +13,9 @@ Core and Builtins
Library
-------
- Issue #3873: Speed up unpickling from file objects which have a peek()
method.
- Issue #10075: Add a session_stats() method to SSLContext objects.
- Issue #9948: Fixed problem of losing filename case information.
...
...
Modules/_pickle.c
Dosyayı görüntüle @
04248a8d
...
...
@@ -101,6 +101,9 @@ enum {
/* Maximum size of the write buffer of Pickler when pickling to a
stream. This is ignored for in-memory pickling. */
MAX_WRITE_BUF_SIZE
=
64
*
1024
,
/* Prefetch size when unpickling (disabled on unpeekable streams) */
PREFETCH
=
8192
*
16
,
};
/* Exception classes for pickle. These should override the ones defined in
...
...
@@ -355,8 +358,10 @@ typedef struct UnpicklerObject {
char
*
input_line
;
Py_ssize_t
input_len
;
Py_ssize_t
next_read_idx
;
Py_ssize_t
prefetched_idx
;
/* index of first prefetched byte */
PyObject
*
read
;
/* read() method of the input stream. */
PyObject
*
readline
;
/* readline() method of the input stream. */
PyObject
*
peek
;
/* peek() method of the input stream, or NULL */
char
*
encoding
;
/* Name of the encoding to be used for
decoding strings pickled using Python
...
...
@@ -859,9 +864,28 @@ _Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
self
->
input_buffer
=
self
->
buffer
.
buf
;
self
->
input_len
=
self
->
buffer
.
len
;
self
->
next_read_idx
=
0
;
self
->
prefetched_idx
=
self
->
input_len
;
return
self
->
input_len
;
}
static
int
_Unpickler_SkipConsumed
(
UnpicklerObject
*
self
)
{
Py_ssize_t
consumed
=
self
->
next_read_idx
-
self
->
prefetched_idx
;
if
(
consumed
>
0
)
{
PyObject
*
r
;
assert
(
self
->
peek
);
/* otherwise we did something wrong */
/* This makes an useless copy... */
r
=
PyObject_CallFunction
(
self
->
read
,
"n"
,
consumed
);
if
(
r
==
NULL
)
return
-
1
;
Py_DECREF
(
r
);
self
->
prefetched_idx
=
self
->
next_read_idx
;
}
return
0
;
}
static
const
Py_ssize_t
READ_WHOLE_LINE
=
-
1
;
/* If reading from a file, we need to only pull the bytes we need, since there
...
...
@@ -882,10 +906,12 @@ static Py_ssize_t
_Unpickler_ReadFromFile
(
UnpicklerObject
*
self
,
Py_ssize_t
n
)
{
PyObject
*
data
;
Py_ssize_t
read_size
;
Py_ssize_t
read_size
,
prefetched_size
=
0
;
assert
(
self
->
read
!=
NULL
);
assert
(
self
->
next_read_idx
==
0
);
if
(
_Unpickler_SkipConsumed
(
self
)
<
0
)
return
-
1
;
if
(
n
==
READ_WHOLE_LINE
)
data
=
PyObject_Call
(
self
->
readline
,
empty_tuple
,
NULL
);
...
...
@@ -895,13 +921,41 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
return
-
1
;
data
=
_Unpickler_FastCall
(
self
,
self
->
read
,
len
);
}
if
(
data
==
NULL
)
return
-
1
;
read_size
=
_Unpickler_SetStringInput
(
self
,
data
);
self
->
input_len
=
0
;
/* Prefetch some data without advancing the file pointer, if possible */
if
(
self
->
peek
)
{
PyObject
*
len
,
*
prefetched
;
len
=
PyLong_FromSsize_t
(
PREFETCH
);
if
(
len
==
NULL
)
{
Py_DECREF
(
data
);
return
-
1
;
}
prefetched
=
_Unpickler_FastCall
(
self
,
self
->
peek
,
len
);
if
(
prefetched
==
NULL
)
{
if
(
PyErr_ExceptionMatches
(
PyExc_NotImplementedError
))
{
/* peek() is probably not supported by the given file object */
PyErr_Clear
();
Py_CLEAR
(
self
->
peek
);
}
else
{
Py_DECREF
(
data
);
return
-
1
;
}
}
else
{
assert
(
PyBytes_Check
(
prefetched
));
prefetched_size
=
PyBytes_GET_SIZE
(
prefetched
);
PyBytes_ConcatAndDel
(
&
data
,
prefetched
);
if
(
data
==
NULL
)
return
-
1
;
}
}
read_size
=
_Unpickler_SetStringInput
(
self
,
data
)
-
prefetched_size
;
Py_DECREF
(
data
);
self
->
prefetched_idx
=
read_size
;
return
read_size
;
}
...
...
@@ -921,30 +975,31 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
static
Py_ssize_t
_Unpickler_Read
(
UnpicklerObject
*
self
,
char
**
s
,
Py_ssize_t
n
)
{
Py_ssize_t
num_read
;
if
(
n
==
0
)
{
*
s
=
NULL
;
return
0
;
}
/* This condition will always be true if self->read. */
if
(
self
->
next_read_idx
+
n
>
self
->
input_len
)
{
if
(
self
->
read
)
{
Py_ssize_t
num_read
;
assert
(
self
->
next_read_idx
==
self
->
input_len
);
num_read
=
_Unpickler_ReadFromFile
(
self
,
n
);
if
(
n
<
0
)
return
-
1
;
if
(
num_read
==
n
)
{
*
s
=
self
->
input_buffer
;
return
num_read
;
}
}
if
(
self
->
next_read_idx
+
n
<=
self
->
input_len
)
{
*
s
=
self
->
input_buffer
+
self
->
next_read_idx
;
self
->
next_read_idx
+=
n
;
return
n
;
}
if
(
!
self
->
read
)
{
PyErr_Format
(
PyExc_EOFError
,
"Ran out of input"
);
return
-
1
;
}
num_read
=
_Unpickler_ReadFromFile
(
self
,
n
)
;
if
(
num_read
<
0
)
return
-
1
;
if
(
num_read
<
n
)
{
PyErr_Format
(
PyExc_EOFError
,
"Ran out of input"
);
return
-
1
;
}
assert
(
self
->
read
==
NULL
);
*
s
=
self
->
input_buffer
+
self
->
next_read_idx
;
self
->
next_read_idx
+=
n
;
*
s
=
self
->
input_buffer
;
self
->
next_read_idx
=
n
;
return
n
;
}
...
...
@@ -972,9 +1027,7 @@ _Unpickler_Readline(UnpicklerObject *self, char **result)
{
Py_ssize_t
i
,
num_read
;
/* This loop will never be entered if self->read is not NULL. */
for
(
i
=
self
->
next_read_idx
;
i
<
self
->
input_len
;
i
++
)
{
assert
(
self
->
read
==
NULL
);
if
(
self
->
input_buffer
[
i
]
==
'\n'
)
{
char
*
line_start
=
self
->
input_buffer
+
self
->
next_read_idx
;
num_read
=
i
-
self
->
next_read_idx
+
1
;
...
...
@@ -983,11 +1036,11 @@ _Unpickler_Readline(UnpicklerObject *self, char **result)
}
}
if
(
self
->
read
)
{
assert
(
self
->
next_read_idx
==
self
->
input_len
);
num_read
=
_Unpickler_ReadFromFile
(
self
,
READ_WHOLE_LINE
);
if
(
num_read
<
0
)
return
-
1
;
*
result
=
self
->
input_buffer
;
self
->
next_read_idx
=
num_read
;
return
num_read
;
}
...
...
@@ -1106,8 +1159,10 @@ _Unpickler_New(void)
self
->
input_line
=
NULL
;
self
->
input_len
=
0
;
self
->
next_read_idx
=
0
;
self
->
prefetched_idx
=
0
;
self
->
read
=
NULL
;
self
->
readline
=
NULL
;
self
->
peek
=
NULL
;
self
->
encoding
=
NULL
;
self
->
errors
=
NULL
;
self
->
marks
=
NULL
;
...
...
@@ -1124,6 +1179,13 @@ _Unpickler_New(void)
static
int
_Unpickler_SetInputStream
(
UnpicklerObject
*
self
,
PyObject
*
file
)
{
self
->
peek
=
PyObject_GetAttrString
(
file
,
"peek"
);
if
(
self
->
peek
==
NULL
)
{
if
(
PyErr_ExceptionMatches
(
PyExc_AttributeError
))
PyErr_Clear
();
else
return
-
1
;
}
self
->
read
=
PyObject_GetAttrString
(
file
,
"read"
);
self
->
readline
=
PyObject_GetAttrString
(
file
,
"readline"
);
if
(
self
->
readline
==
NULL
||
self
->
read
==
NULL
)
{
...
...
@@ -1132,6 +1194,7 @@ _Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
"file must have 'read' and 'readline' attributes"
);
Py_CLEAR
(
self
->
read
);
Py_CLEAR
(
self
->
readline
);
Py_CLEAR
(
self
->
peek
);
return
-
1
;
}
return
0
;
...
...
@@ -5207,6 +5270,9 @@ load(UnpicklerObject *self)
break
;
/* and we are done! */
}
if
(
_Unpickler_SkipConsumed
(
self
)
<
0
)
return
NULL
;
/* XXX: It is not clear what this is actually for. */
if
((
err
=
PyErr_Occurred
()))
{
if
(
err
==
PyExc_EOFError
)
{
...
...
@@ -5356,6 +5422,7 @@ Unpickler_dealloc(UnpicklerObject *self)
PyObject_GC_UnTrack
((
PyObject
*
)
self
);
Py_XDECREF
(
self
->
readline
);
Py_XDECREF
(
self
->
read
);
Py_XDECREF
(
self
->
peek
);
Py_XDECREF
(
self
->
stack
);
Py_XDECREF
(
self
->
pers_func
);
Py_XDECREF
(
self
->
arg
);
...
...
@@ -5378,6 +5445,7 @@ Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
{
Py_VISIT
(
self
->
readline
);
Py_VISIT
(
self
->
read
);
Py_VISIT
(
self
->
peek
);
Py_VISIT
(
self
->
stack
);
Py_VISIT
(
self
->
pers_func
);
Py_VISIT
(
self
->
arg
);
...
...
@@ -5389,6 +5457,7 @@ Unpickler_clear(UnpicklerObject *self)
{
Py_CLEAR
(
self
->
readline
);
Py_CLEAR
(
self
->
read
);
Py_CLEAR
(
self
->
peek
);
Py_CLEAR
(
self
->
stack
);
Py_CLEAR
(
self
->
pers_func
);
Py_CLEAR
(
self
->
arg
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment