Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
af14b79c
Kaydet (Commit)
af14b79c
authored
Agu 07, 2008
tarafından
Antoine Pitrou
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
issue #3460: PyUnicode_Join() implementation can be simplified in py3k
üst
4cc0f248
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
58 additions
and
90 deletions
+58
-90
NEWS
Misc/NEWS
+4
-0
unicodeobject.c
Objects/unicodeobject.c
+54
-90
No files found.
Misc/NEWS
Dosyayı görüntüle @
af14b79c
...
...
@@ -22,6 +22,10 @@ Core and Builtins
If you need to access the UTF-8 representation of a Unicode object
as bytes string, please use PyUnicode_AsUTF8String() instead.
- Issue #3460: PyUnicode_Join() implementation is 10% to 80% faster thanks
to Python 3.0's stricter semantics which allow to avoid successive
reallocations of the result string (this also affects str.join()).
Library
-------
...
...
Objects/unicodeobject.c
Dosyayı görüntüle @
af14b79c
...
...
@@ -5619,78 +5619,70 @@ int fixtitle(PyUnicodeObject *self)
PyObject
*
PyUnicode_Join
(
PyObject
*
separator
,
PyObject
*
seq
)
{
PyObject
*
internal_separator
=
NULL
;
const
Py_UNICODE
blank
=
' '
;
const
Py_UNICODE
*
sep
=
&
blank
;
Py_ssize_t
seplen
=
1
;
PyUnicodeObject
*
res
=
NULL
;
/* the result */
Py_ssize_t
res_alloc
=
100
;
/* # allocated bytes for string in res */
Py_ssize_t
res_used
;
/* # used bytes */
Py_UNICODE
*
res_p
;
/* pointer to free byte in res's string area */
PyObject
*
fseq
;
/* PySequence_Fast(seq) */
Py_ssize_t
seqlen
;
/* len(fseq) -- number of items in sequence */
Py_ssize_t
seqlen
;
/* len(fseq) -- number of items in sequence */
PyObject
**
items
;
PyObject
*
item
;
Py_ssize_t
i
;
Py_ssize_t
sz
,
i
;
fseq
=
PySequence_Fast
(
seq
,
""
);
if
(
fseq
==
NULL
)
{
return
NULL
;
}
/* Grrrr. A codec may be invoked to convert str objects to
* Unicode, and so it's possible to call back into Python code
* during PyUnicode_FromObject(), and so it's possible for a sick
* codec to change the size of fseq (if seq is a list). Therefore
* we have to keep refetching the size -- can't assume seqlen
* is invariant.
/* NOTE: the following code can't call back into Python code,
* so we are sure that fseq won't be mutated.
*/
seqlen
=
PySequence_Fast_GET_SIZE
(
fseq
);
/* If empty sequence, return u"". */
if
(
seqlen
==
0
)
{
res
=
_PyUnicode_New
(
0
);
/* empty sequence; return u"" */
goto
Done
;
}
items
=
PySequence_Fast_ITEMS
(
fseq
);
/* If singleton sequence with an exact Unicode, return that. */
if
(
seqlen
==
1
)
{
item
=
PySequence_Fast_GET_ITEM
(
fseq
,
0
)
;
item
=
items
[
0
]
;
if
(
PyUnicode_CheckExact
(
item
))
{
Py_INCREF
(
item
);
res
=
(
PyUnicodeObject
*
)
item
;
goto
Done
;
}
}
/* At least two items to join, or one that isn't exact Unicode. */
if
(
seqlen
>
1
)
{
/* Set up sep and seplen -- they're needed. */
if
(
separator
==
NULL
)
{
sep
=
&
blank
;
seplen
=
1
;
else
{
/* Set up sep and seplen */
if
(
separator
==
NULL
)
{
sep
=
&
blank
;
seplen
=
1
;
}
else
{
internal_separator
=
PyUnicode_FromObject
(
separator
);
if
(
internal_separator
==
NULL
)
goto
onError
;
sep
=
PyUnicode_AS_UNICODE
(
internal_separator
);
seplen
=
PyUnicode_GET_SIZE
(
internal_separator
);
/* In case PyUnicode_FromObject() mutated seq. */
seqlen
=
PySequence_Fast_GET_SIZE
(
fseq
);
else
{
if
(
!
PyUnicode_Check
(
separator
))
{
PyErr_Format
(
PyExc_TypeError
,
"separator: expected str instance,"
" %.80s found"
,
Py_TYPE
(
separator
)
->
tp_name
);
goto
onError
;
}
sep
=
PyUnicode_AS_UNICODE
(
separator
);
seplen
=
PyUnicode_GET_SIZE
(
separator
);
}
}
/* Get space. */
res
=
_PyUnicode_New
(
res_alloc
);
if
(
res
==
NULL
)
goto
onError
;
res_p
=
PyUnicode_AS_UNICODE
(
res
);
res_used
=
0
;
for
(
i
=
0
;
i
<
seqlen
;
++
i
)
{
Py_ssize_t
itemlen
;
Py_ssize_t
new_res_used
;
item
=
PySequence_Fast_GET_ITEM
(
fseq
,
i
);
/* Convert item to Unicode. */
/* There are at least two things to join, or else we have a subclass
* of str in the sequence.
* Do a pre-pass to figure out the total amount of space we'll
* need (sz), and see whether all argument are strings.
*/
sz
=
0
;
for
(
i
=
0
;
i
<
seqlen
;
i
++
)
{
const
Py_ssize_t
old_sz
=
sz
;
item
=
items
[
i
];
if
(
!
PyUnicode_Check
(
item
))
{
PyErr_Format
(
PyExc_TypeError
,
"sequence item %zd: expected str instance,"
...
...
@@ -5698,68 +5690,40 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
i
,
Py_TYPE
(
item
)
->
tp_name
);
goto
onError
;
}
item
=
PyUnicode_FromObject
(
item
);
if
(
item
==
NULL
)
goto
onError
;
/* We own a reference to item from here on. */
/* In case PyUnicode_FromObject() mutated seq. */
seqlen
=
PySequence_Fast_GET_SIZE
(
fseq
);
/* Make sure we have enough space for the separator and the item. */
itemlen
=
PyUnicode_GET_SIZE
(
item
);
new_res_used
=
res_used
+
itemlen
;
if
(
new_res_used
<
0
)
goto
Overflow
;
if
(
i
<
seqlen
-
1
)
{
new_res_used
+=
seplen
;
if
(
new_res_used
<
0
)
goto
Overflow
;
}
if
(
new_res_used
>
res_alloc
)
{
/* double allocated size until it's big enough */
do
{
res_alloc
+=
res_alloc
;
if
(
res_alloc
<=
0
)
goto
Overflow
;
}
while
(
new_res_used
>
res_alloc
);
if
(
_PyUnicode_Resize
(
&
res
,
res_alloc
)
<
0
)
{
Py_DECREF
(
item
);
goto
onError
;
}
res_p
=
PyUnicode_AS_UNICODE
(
res
)
+
res_used
;
}
sz
+=
PyUnicode_GET_SIZE
(
item
);
if
(
i
!=
0
)
sz
+=
seplen
;
if
(
sz
<
old_sz
||
sz
>
PY_SSIZE_T_MAX
)
{
PyErr_SetString
(
PyExc_OverflowError
,
"join() result is too long for a Python string"
);
goto
onError
;
}
}
res
=
_PyUnicode_New
(
sz
);
if
(
res
==
NULL
)
goto
onError
;
/* Catenate everything. */
res_p
=
PyUnicode_AS_UNICODE
(
res
);
for
(
i
=
0
;
i
<
seqlen
;
++
i
)
{
Py_ssize_t
itemlen
;
item
=
items
[
i
];
itemlen
=
PyUnicode_GET_SIZE
(
item
);
/* Copy item, and maybe the separator. */
Py_UNICODE_COPY
(
res_p
,
PyUnicode_AS_UNICODE
(
item
),
itemlen
);
res_p
+=
itemlen
;
if
(
i
<
seqlen
-
1
)
{
if
(
i
)
{
Py_UNICODE_COPY
(
res_p
,
sep
,
seplen
);
res_p
+=
seplen
;
}
Py_
DECREF
(
item
);
res_
used
=
new_res_used
;
Py_
UNICODE_COPY
(
res_p
,
PyUnicode_AS_UNICODE
(
item
),
itemlen
);
res_
p
+=
itemlen
;
}
/* Shrink res to match the used area; this probably can't fail,
* but it's cheap to check.
*/
if
(
_PyUnicode_Resize
(
&
res
,
res_used
)
<
0
)
goto
onError
;
Done:
Py_XDECREF
(
internal_separator
);
Py_DECREF
(
fseq
);
return
(
PyObject
*
)
res
;
Overflow:
PyErr_SetString
(
PyExc_OverflowError
,
"join() result is too long for a Python string"
);
Py_DECREF
(
item
);
/* fall through */
onError:
Py_XDECREF
(
internal_separator
);
Py_DECREF
(
fseq
);
Py_XDECREF
(
res
);
return
NULL
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment