Kaydet (Commit) 39b29be8 authored tarafından Skip Montanaro's avatar Skip Montanaro

Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was

returning 'a' as the delimiter.  It now returns '|', but not because I
understood better what the code was supposed to do.  Would someone that
understands the idea behind _guess_delimiter() (see its doc string) look to
see if my fallback choice is better than before or if it's just serendipity
that I picked the proper delimiter?
üst 0174dddc
...@@ -152,10 +152,13 @@ class Sniffer: ...@@ -152,10 +152,13 @@ class Sniffer:
quotechar, delimiter, skipinitialspace = \ quotechar, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters) self._guess_quote_and_delimiter(sample, delimiters)
if delimiter is None: if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample, delimiter, skipinitialspace = self._guess_delimiter(sample,
delimiters) delimiters)
if not delimiter:
raise Error, "Could not determine delimiter"
class dialect(Dialect): class dialect(Dialect):
_name = "sniffed" _name = "sniffed"
lineterminator = '\r\n' lineterminator = '\r\n'
...@@ -329,8 +332,12 @@ class Sniffer: ...@@ -329,8 +332,12 @@ class Sniffer:
data[0].count("%c " % d)) data[0].count("%c " % d))
return (d, skipinitialspace) return (d, skipinitialspace)
# finally, just return the first damn character in the list # nothing else indicates a preference, pick the character that
delim = delims.keys()[0] # dominates(?)
items = [(v,k) for (k,v) in delims.items()]
items.sort()
delim = items[-1][1]
skipinitialspace = (data[0].count(delim) == skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim)) data[0].count("%c " % delim))
return (delim, skipinitialspace) return (delim, skipinitialspace)
......
...@@ -852,6 +852,8 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back ...@@ -852,6 +852,8 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
''' '''
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n" sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
sample6 = "a|b|c\r\nd|e|f\r\n"
sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
def test_has_header(self): def test_has_header(self):
sniffer = csv.Sniffer() sniffer = csv.Sniffer()
...@@ -882,6 +884,11 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back ...@@ -882,6 +884,11 @@ Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
self.assertEqual(dialect.delimiter, ";") self.assertEqual(dialect.delimiter, ";")
dialect = sniffer.sniff(self.sample5) dialect = sniffer.sniff(self.sample5)
self.assertEqual(dialect.delimiter, "\t") self.assertEqual(dialect.delimiter, "\t")
dialect = sniffer.sniff(self.sample6)
self.assertEqual(dialect.delimiter, "|")
dialect = sniffer.sniff(self.sample7)
self.assertEqual(dialect.delimiter, "|")
self.assertEqual(dialect.quotechar, "'")
if not hasattr(sys, "gettotalrefcount"): if not hasattr(sys, "gettotalrefcount"):
if test_support.verbose: print "*** skipping leakage tests ***" if test_support.verbose: print "*** skipping leakage tests ***"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment