Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
c0c00c38
Kaydet (Commit)
c0c00c38
authored
Ara 01, 2015
tarafından
Steven D'Aprano
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Fix for issue #25177 with the mean of very small and very large numbers.
üst
a8d83f5f
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
114 additions
and
67 deletions
+114
-67
statistics.py
Lib/statistics.py
+110
-67
test_statistics.py
Lib/test/test_statistics.py
+0
-0
NEWS
Misc/NEWS
+4
-0
No files found.
Lib/statistics.py
Dosyayı görüntüle @
c0c00c38
...
@@ -104,6 +104,8 @@ import math
...
@@ -104,6 +104,8 @@ import math
from
fractions
import
Fraction
from
fractions
import
Fraction
from
decimal
import
Decimal
from
decimal
import
Decimal
from
itertools
import
groupby
# === Exceptions ===
# === Exceptions ===
...
@@ -115,86 +117,102 @@ class StatisticsError(ValueError):
...
@@ -115,86 +117,102 @@ class StatisticsError(ValueError):
# === Private utilities ===
# === Private utilities ===
def
_sum
(
data
,
start
=
0
):
def
_sum
(
data
,
start
=
0
):
"""_sum(data [, start]) -> value
"""_sum(data [, start]) -> (type, sum, count)
Return a high-precision sum of the given numeric data as a fraction,
together with the type to be converted to and the count of items.
Return a high-precision sum of the given numeric data. If optional
If optional argument ``start`` is given, it is added to the total.
argument ``start`` is given, it is added to the total. If ``data`` is
If ``data`` is empty, ``start`` (defaulting to 0) is returned.
empty, ``start`` (defaulting to 0) is returned.
Examples
Examples
--------
--------
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
>>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
11.0
(<class 'float'>, Fraction(11, 1), 5)
Some sources of round-off error will be avoided:
Some sources of round-off error will be avoided:
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
>>> _sum([1e50, 1, -1e50] * 1000) # Built-in sum returns zero.
1000.0
(<class 'float'>, Fraction(1000, 1), 3000)
Fractions and Decimals are also supported:
Fractions and Decimals are also supported:
>>> from fractions import Fraction as F
>>> from fractions import Fraction as F
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
>>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
Fraction(63, 20
)
(<class 'fractions.Fraction'>, Fraction(63, 20), 4
)
>>> from decimal import Decimal as D
>>> from decimal import Decimal as D
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
>>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
>>> _sum(data)
>>> _sum(data)
Decimal('0.6963'
)
(<class 'decimal.Decimal'>, Fraction(6963, 10000), 4
)
Mixed types are currently treated as an error, except that int is
Mixed types are currently treated as an error, except that int is
allowed.
allowed.
"""
"""
# We fail as soon as we reach a value that is not an int or the type of
count
=
0
# the first value which is not an int. E.g. _sum([int, int, float, int])
# is okay, but sum([int, int, float, Fraction]) is not.
allowed_types
=
set
([
int
,
type
(
start
)])
n
,
d
=
_exact_ratio
(
start
)
n
,
d
=
_exact_ratio
(
start
)
partials
=
{
d
:
n
}
# map {denominator: sum of numerators}
partials
=
{
d
:
n
}
# Micro-optimizations.
exact_ratio
=
_exact_ratio
partials_get
=
partials
.
get
partials_get
=
partials
.
get
# Add numerators for each denominator.
T
=
_coerce
(
int
,
type
(
start
))
for
x
in
data
:
for
typ
,
values
in
groupby
(
data
,
type
):
_check_type
(
type
(
x
),
allowed_types
)
T
=
_coerce
(
T
,
typ
)
# or raise TypeError
n
,
d
=
exact_ratio
(
x
)
for
n
,
d
in
map
(
_exact_ratio
,
values
):
count
+=
1
partials
[
d
]
=
partials_get
(
d
,
0
)
+
n
partials
[
d
]
=
partials_get
(
d
,
0
)
+
n
# Find the expected result type. If allowed_types has only one item, it
# will be int; if it has two, use the one which isn't int.
assert
len
(
allowed_types
)
in
(
1
,
2
)
if
len
(
allowed_types
)
==
1
:
assert
allowed_types
.
pop
()
is
int
T
=
int
else
:
T
=
(
allowed_types
-
set
([
int
]))
.
pop
()
if
None
in
partials
:
if
None
in
partials
:
assert
issubclass
(
T
,
(
float
,
Decimal
))
# The sum will be a NAN or INF. We can ignore all the finite
assert
not
math
.
isfinite
(
partials
[
None
])
# partials, and just look at this special one.
return
T
(
partials
[
None
])
total
=
partials
[
None
]
total
=
Fraction
()
assert
not
_isfinite
(
total
)
for
d
,
n
in
sorted
(
partials
.
items
()):
else
:
total
+=
Fraction
(
n
,
d
)
# Sum all the partial sums using builtin sum.
if
issubclass
(
T
,
int
):
# FIXME is this faster if we sum them in order of the denominator?
assert
total
.
denominator
==
1
total
=
sum
(
Fraction
(
n
,
d
)
for
d
,
n
in
sorted
(
partials
.
items
()))
return
T
(
total
.
numerator
)
return
(
T
,
total
,
count
)
if
issubclass
(
T
,
Decimal
):
return
T
(
total
.
numerator
)
/
total
.
denominator
return
T
(
total
)
def
_check_type
(
T
,
allowed
):
def
_isfinite
(
x
):
if
T
not
in
allowed
:
try
:
if
len
(
allowed
)
==
1
:
return
x
.
is_finite
()
# Likely a Decimal.
allowed
.
add
(
T
)
except
AttributeError
:
else
:
return
math
.
isfinite
(
x
)
# Coerces to float first.
types
=
', '
.
join
([
t
.
__name__
for
t
in
allowed
]
+
[
T
.
__name__
])
raise
TypeError
(
"unsupported mixed types:
%
s"
%
types
)
def
_coerce
(
T
,
S
):
"""Coerce types T and S to a common type, or raise TypeError.
Coercion rules are currently an implementation detail. See the CoerceTest
test class in test_statistics for details.
"""
# See http://bugs.python.org/issue24068.
assert
T
is
not
bool
,
"initial type T is bool"
# If the types are the same, no need to coerce anything. Put this
# first, so that the usual case (no coercion needed) happens as soon
# as possible.
if
T
is
S
:
return
T
# Mixed int & other coerce to the other type.
if
S
is
int
or
S
is
bool
:
return
T
if
T
is
int
:
return
S
# If one is a (strict) subclass of the other, coerce to the subclass.
if
issubclass
(
S
,
T
):
return
S
if
issubclass
(
T
,
S
):
return
T
# Ints coerce to the other type.
if
issubclass
(
T
,
int
):
return
S
if
issubclass
(
S
,
int
):
return
T
# Mixed fraction & float coerces to float (or float subclass).
if
issubclass
(
T
,
Fraction
)
and
issubclass
(
S
,
float
):
return
S
if
issubclass
(
T
,
float
)
and
issubclass
(
S
,
Fraction
):
return
T
# Any other combination is disallowed.
msg
=
"don't know how to coerce
%
s and
%
s"
raise
TypeError
(
msg
%
(
T
.
__name__
,
S
.
__name__
))
def
_exact_ratio
(
x
):
def
_exact_ratio
(
x
):
"""
Convert Real number x exactly to
(numerator, denominator) pair.
"""
Return Real number x to exact
(numerator, denominator) pair.
>>> _exact_ratio(0.25)
>>> _exact_ratio(0.25)
(1, 4)
(1, 4)
...
@@ -202,29 +220,31 @@ def _exact_ratio(x):
...
@@ -202,29 +220,31 @@ def _exact_ratio(x):
x is expected to be an int, Fraction, Decimal or float.
x is expected to be an int, Fraction, Decimal or float.
"""
"""
try
:
try
:
# Optimise the common case of floats. We expect that the most often
# used numeric type will be builtin floats, so try to make this as
# fast as possible.
if
type
(
x
)
is
float
:
return
x
.
as_integer_ratio
()
try
:
try
:
#
int, Fraction
#
x may be an int, Fraction, or Integral ABC.
return
(
x
.
numerator
,
x
.
denominator
)
return
(
x
.
numerator
,
x
.
denominator
)
except
AttributeError
:
except
AttributeError
:
# float
try
:
try
:
# x may be a float subclass.
return
x
.
as_integer_ratio
()
return
x
.
as_integer_ratio
()
except
AttributeError
:
except
AttributeError
:
# Decimal
try
:
try
:
# x may be a Decimal.
return
_decimal_to_ratio
(
x
)
return
_decimal_to_ratio
(
x
)
except
AttributeError
:
except
AttributeError
:
msg
=
"can't convert type '{}' to numerator/denominator"
# Just give up?
raise
TypeError
(
msg
.
format
(
type
(
x
)
.
__name__
))
from
None
pass
except
(
OverflowError
,
ValueError
):
except
(
OverflowError
,
ValueError
):
# INF or NAN
# float NAN or INF.
if
__debug__
:
# Decimal signalling NANs cannot be converted to float :-(
if
isinstance
(
x
,
Decimal
):
assert
not
x
.
is_finite
()
else
:
assert
not
math
.
isfinite
(
x
)
assert
not
math
.
isfinite
(
x
)
return
(
x
,
None
)
return
(
x
,
None
)
msg
=
"can't convert type '{}' to numerator/denominator"
raise
TypeError
(
msg
.
format
(
type
(
x
)
.
__name__
))
# FIXME This is faster than Fraction.from_decimal, but still too slow.
# FIXME This is faster than Fraction.from_decimal, but still too slow.
...
@@ -239,7 +259,7 @@ def _decimal_to_ratio(d):
...
@@ -239,7 +259,7 @@ def _decimal_to_ratio(d):
sign
,
digits
,
exp
=
d
.
as_tuple
()
sign
,
digits
,
exp
=
d
.
as_tuple
()
if
exp
in
(
'F'
,
'n'
,
'N'
):
# INF, NAN, sNAN
if
exp
in
(
'F'
,
'n'
,
'N'
):
# INF, NAN, sNAN
assert
not
d
.
is_finite
()
assert
not
d
.
is_finite
()
r
aise
ValueError
r
eturn
(
d
,
None
)
num
=
0
num
=
0
for
digit
in
digits
:
for
digit
in
digits
:
num
=
num
*
10
+
digit
num
=
num
*
10
+
digit
...
@@ -253,6 +273,24 @@ def _decimal_to_ratio(d):
...
@@ -253,6 +273,24 @@ def _decimal_to_ratio(d):
return
(
num
,
den
)
return
(
num
,
den
)
def
_convert
(
value
,
T
):
"""Convert value to given numeric type T."""
if
type
(
value
)
is
T
:
# This covers the cases where T is Fraction, or where value is
# a NAN or INF (Decimal or float).
return
value
if
issubclass
(
T
,
int
)
and
value
.
denominator
!=
1
:
T
=
float
try
:
# FIXME: what do we do if this overflows?
return
T
(
value
)
except
TypeError
:
if
issubclass
(
T
,
Decimal
):
return
T
(
value
.
numerator
)
/
T
(
value
.
denominator
)
else
:
raise
def
_counts
(
data
):
def
_counts
(
data
):
# Generate a table of sorted (value, frequency) pairs.
# Generate a table of sorted (value, frequency) pairs.
table
=
collections
.
Counter
(
iter
(
data
))
.
most_common
()
table
=
collections
.
Counter
(
iter
(
data
))
.
most_common
()
...
@@ -290,7 +328,9 @@ def mean(data):
...
@@ -290,7 +328,9 @@ def mean(data):
n
=
len
(
data
)
n
=
len
(
data
)
if
n
<
1
:
if
n
<
1
:
raise
StatisticsError
(
'mean requires at least one data point'
)
raise
StatisticsError
(
'mean requires at least one data point'
)
return
_sum
(
data
)
/
n
T
,
total
,
count
=
_sum
(
data
)
assert
count
==
n
return
_convert
(
total
/
n
,
T
)
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
# FIXME: investigate ways to calculate medians without sorting? Quickselect?
...
@@ -460,12 +500,14 @@ def _ss(data, c=None):
...
@@ -460,12 +500,14 @@ def _ss(data, c=None):
"""
"""
if
c
is
None
:
if
c
is
None
:
c
=
mean
(
data
)
c
=
mean
(
data
)
ss
=
_sum
((
x
-
c
)
**
2
for
x
in
data
)
T
,
total
,
count
=
_sum
((
x
-
c
)
**
2
for
x
in
data
)
# The following sum should mathematically equal zero, but due to rounding
# The following sum should mathematically equal zero, but due to rounding
# error may not.
# error may not.
ss
-=
_sum
((
x
-
c
)
for
x
in
data
)
**
2
/
len
(
data
)
U
,
total2
,
count2
=
_sum
((
x
-
c
)
for
x
in
data
)
assert
not
ss
<
0
,
'negative sum of square deviations:
%
f'
%
ss
assert
T
==
U
and
count
==
count2
return
ss
total
-=
total2
**
2
/
len
(
data
)
assert
not
total
<
0
,
'negative sum of square deviations:
%
f'
%
total
return
(
T
,
total
)
def
variance
(
data
,
xbar
=
None
):
def
variance
(
data
,
xbar
=
None
):
...
@@ -511,8 +553,8 @@ def variance(data, xbar=None):
...
@@ -511,8 +553,8 @@ def variance(data, xbar=None):
n
=
len
(
data
)
n
=
len
(
data
)
if
n
<
2
:
if
n
<
2
:
raise
StatisticsError
(
'variance requires at least two data points'
)
raise
StatisticsError
(
'variance requires at least two data points'
)
ss
=
_ss
(
data
,
xbar
)
T
,
ss
=
_ss
(
data
,
xbar
)
return
ss
/
(
n
-
1
)
return
_convert
(
ss
/
(
n
-
1
),
T
)
def
pvariance
(
data
,
mu
=
None
):
def
pvariance
(
data
,
mu
=
None
):
...
@@ -560,7 +602,8 @@ def pvariance(data, mu=None):
...
@@ -560,7 +602,8 @@ def pvariance(data, mu=None):
if
n
<
1
:
if
n
<
1
:
raise
StatisticsError
(
'pvariance requires at least one data point'
)
raise
StatisticsError
(
'pvariance requires at least one data point'
)
ss
=
_ss
(
data
,
mu
)
ss
=
_ss
(
data
,
mu
)
return
ss
/
n
T
,
ss
=
_ss
(
data
,
mu
)
return
_convert
(
ss
/
n
,
T
)
def
stdev
(
data
,
xbar
=
None
):
def
stdev
(
data
,
xbar
=
None
):
...
...
Lib/test/test_statistics.py
Dosyayı görüntüle @
c0c00c38
This diff is collapsed.
Click to expand it.
Misc/NEWS
Dosyayı görüntüle @
c0c00c38
...
@@ -113,6 +113,10 @@ Core and Builtins
...
@@ -113,6 +113,10 @@ Core and Builtins
Library
Library
-------
-------
- Issue #25177: Fixed problem with the mean of very small and very large
numbers. As a side effect, statistics.mean and statistics.variance should
be significantly faster.
- Issue #25718: Fixed copying object with state with boolean value is false.
- Issue #25718: Fixed copying object with state with boolean value is false.
- Issue #10131: Fixed deep copying of minidom documents. Based on patch
- Issue #10131: Fixed deep copying of minidom documents. Based on patch
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment