Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
1448d471
Kaydet (Commit)
1448d471
authored
Nis 25, 2003
tarafından
Skip Montanaro
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
rework Sniffer api significantly
üst
48816c6f
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
41 deletions
+26
-41
csv.py
Lib/csv.py
+26
-41
No files found.
Lib/csv.py
Dosyayı görüntüle @
1448d471
...
@@ -9,6 +9,11 @@ from _csv import Error, __version__, writer, reader, register_dialect, \
...
@@ -9,6 +9,11 @@ from _csv import Error, __version__, writer, reader, register_dialect, \
QUOTE_MINIMAL
,
QUOTE_ALL
,
QUOTE_NONNUMERIC
,
QUOTE_NONE
,
\
QUOTE_MINIMAL
,
QUOTE_ALL
,
QUOTE_NONNUMERIC
,
QUOTE_NONE
,
\
__doc__
__doc__
try
:
from
cStringIO
import
StringIO
except
ImportError
:
from
StringIO
import
StringIO
__all__
=
[
"QUOTE_MINIMAL"
,
"QUOTE_ALL"
,
"QUOTE_NONNUMERIC"
,
"QUOTE_NONE"
,
__all__
=
[
"QUOTE_MINIMAL"
,
"QUOTE_ALL"
,
"QUOTE_NONNUMERIC"
,
"QUOTE_NONE"
,
"Error"
,
"Dialect"
,
"excel"
,
"excel_tab"
,
"reader"
,
"writer"
,
"Error"
,
"Dialect"
,
"excel"
,
"excel_tab"
,
"reader"
,
"writer"
,
"register_dialect"
,
"get_dialect"
,
"list_dialects"
,
"Sniffer"
,
"register_dialect"
,
"get_dialect"
,
"list_dialects"
,
"Sniffer"
,
...
@@ -147,52 +152,39 @@ class DictWriter:
...
@@ -147,52 +152,39 @@ class DictWriter:
class
Sniffer
:
class
Sniffer
:
'''
'''
"Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
"Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
Returns a
csv.
Dialect object.
Returns a Dialect object.
'''
'''
def
__init__
(
self
,
sample
=
16
*
1024
):
def
__init__
(
self
):
# in case there is more than one possible delimiter
# in case there is more than one possible delimiter
self
.
preferred
=
[
','
,
'
\t
'
,
';'
,
' '
,
':'
]
self
.
preferred
=
[
','
,
'
\t
'
,
';'
,
' '
,
':'
]
# amount of data (in bytes) to sample
self
.
sample
=
sample
def
sniff
(
self
,
sample
):
def
sniff
(
self
,
fileobj
):
"""
"""
Takes a file-like object and returns a dialect (or None)
Returns a dialect (or None) corresponding to the sample
"""
"""
self
.
fileobj
=
fileobj
data
=
fileobj
.
read
(
self
.
sample
)
quotechar
,
delimiter
,
skipinitialspace
=
\
quotechar
,
delimiter
,
skipinitialspace
=
\
self
.
_guess
QuoteAndDelimiter
(
data
)
self
.
_guess
_quote_and_delimiter
(
sample
)
if
delimiter
is
None
:
if
delimiter
is
None
:
delimiter
,
skipinitialspace
=
self
.
_guess
Delimiter
(
data
)
delimiter
,
skipinitialspace
=
self
.
_guess
_delimiter
(
sample
)
class
SniffedD
ialect
(
Dialect
):
class
d
ialect
(
Dialect
):
_name
=
"sniffed"
_name
=
"sniffed"
lineterminator
=
'
\r\n
'
lineterminator
=
'
\r\n
'
quoting
=
QUOTE_MINIMAL
quoting
=
QUOTE_MINIMAL
# escapechar = ''
# escapechar = ''
doublequote
=
False
doublequote
=
False
SniffedDialect
.
delimiter
=
delimiter
SniffedDialect
.
quotechar
=
quotechar
SniffedDialect
.
skipinitialspace
=
skipinitialspace
self
.
dialect
=
SniffedDialect
dialect
.
delimiter
=
delimiter
return
self
.
dialect
# _csv.reader won't accept a quotechar of ''
dialect
.
quotechar
=
quotechar
or
'"'
dialect
.
skipinitialspace
=
skipinitialspace
return
dialect
def
hasHeaders
(
self
):
return
self
.
_hasHeaders
(
self
.
fileobj
,
self
.
dialect
)
def
_guess_quote_and_delimiter
(
self
,
data
):
def
register_dialect
(
self
,
name
=
'sniffed'
):
register_dialect
(
name
,
self
.
dialect
)
def
_guessQuoteAndDelimiter
(
self
,
data
):
"""
"""
Looks for text enclosed between two identical quotes
Looks for text enclosed between two identical quotes
(the probable quotechar) which are preceded and followed
(the probable quotechar) which are preceded and followed
...
@@ -256,7 +248,7 @@ class Sniffer:
...
@@ -256,7 +248,7 @@ class Sniffer:
return
(
quotechar
,
delim
,
skipinitialspace
)
return
(
quotechar
,
delim
,
skipinitialspace
)
def
_guess
D
elimiter
(
self
,
data
):
def
_guess
_d
elimiter
(
self
,
data
):
"""
"""
The delimiter /should/ occur the same number of times on
The delimiter /should/ occur the same number of times on
each row. However, due to malformed data, it may not. We don't want
each row. However, due to malformed data, it may not. We don't want
...
@@ -290,12 +282,12 @@ class Sniffer:
...
@@ -290,12 +282,12 @@ class Sniffer:
iteration
+=
1
iteration
+=
1
for
line
in
data
[
start
:
end
]:
for
line
in
data
[
start
:
end
]:
for
char
in
ascii
:
for
char
in
ascii
:
meta
f
requency
=
charFrequency
.
get
(
char
,
{})
meta
F
requency
=
charFrequency
.
get
(
char
,
{})
# must count even if frequency is 0
# must count even if frequency is 0
freq
=
line
.
strip
()
.
count
(
char
)
freq
=
line
.
strip
()
.
count
(
char
)
# value is the mode
# value is the mode
meta
frequency
[
freq
]
=
metaf
requency
.
get
(
freq
,
0
)
+
1
meta
Frequency
[
freq
]
=
metaF
requency
.
get
(
freq
,
0
)
+
1
charFrequency
[
char
]
=
meta
f
requency
charFrequency
[
char
]
=
meta
F
requency
for
char
in
charFrequency
.
keys
():
for
char
in
charFrequency
.
keys
():
items
=
charFrequency
[
char
]
.
items
()
items
=
charFrequency
[
char
]
.
items
()
...
@@ -356,7 +348,7 @@ class Sniffer:
...
@@ -356,7 +348,7 @@ class Sniffer:
return
(
delim
,
skipinitialspace
)
return
(
delim
,
skipinitialspace
)
def
_hasHeaders
(
self
,
fileobj
,
dialect
):
def
has_header
(
self
,
sample
):
# Creates a dictionary of types of data in each column. If any
# Creates a dictionary of types of data in each column. If any
# column is of a single type (say, integers), *except* for the first
# column is of a single type (say, integers), *except* for the first
# row, then the first row is presumed to be labels. If the type
# row, then the first row is presumed to be labels. If the type
...
@@ -373,23 +365,16 @@ class Sniffer:
...
@@ -373,23 +365,16 @@ class Sniffer:
"""
"""
return
eval
(
item
.
replace
(
'('
,
''
)
.
replace
(
')'
,
''
))
return
eval
(
item
.
replace
(
'('
,
''
)
.
replace
(
')'
,
''
))
# rewind the fileobj - this might not work for some file-like
rdr
=
reader
(
StringIO
(
sample
),
self
.
sniff
(
sample
))
# objects...
fileobj
.
seek
(
0
)
r
=
csv
.
reader
(
fileobj
,
delimiter
=
dialect
.
delimiter
,
quotechar
=
dialect
.
quotechar
,
skipinitialspace
=
dialect
.
skipinitialspace
)
header
=
r
.
next
()
# assume first row is header
header
=
r
dr
.
next
()
# assume first row is header
columns
=
len
(
header
)
columns
=
len
(
header
)
columnTypes
=
{}
columnTypes
=
{}
for
i
in
range
(
columns
):
columnTypes
[
i
]
=
None
for
i
in
range
(
columns
):
columnTypes
[
i
]
=
None
checked
=
0
checked
=
0
for
row
in
r
:
for
row
in
r
dr
:
# arbitrary number of rows to check, to keep it sane
# arbitrary number of rows to check, to keep it sane
if
checked
>
20
:
if
checked
>
20
:
break
break
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment