cookies.py 20.2 KB
Newer Older
1 2 3 4 5
#!/usr/bin/env python
#

####
# Copyright 2000 by Timothy O'Malley <timo@alum.mit.edu>
Tim Peters's avatar
Tim Peters committed
6
#
7
#                All Rights Reserved
Tim Peters's avatar
Tim Peters committed
8
#
9 10 11 12 13 14 15
# Permission to use, copy, modify, and distribute this software
# and its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
# copies and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Timothy O'Malley  not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
Tim Peters's avatar
Tim Peters committed
16 17
# prior permission.
#
18 19 20 21 22 23 24
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
Tim Peters's avatar
Tim Peters committed
25
# PERFORMANCE OF THIS SOFTWARE.
26 27
#
####
Tim Peters's avatar
Tim Peters committed
28 29
#
# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp
30 31 32 33 34 35 36
#   by Timothy O'Malley <timo@alum.mit.edu>
#
#  Cookie.py is a Python module for the handling of HTTP
#  cookies as a Python dictionary.  See RFC 2109 for more
#  information on cookies.
#
#  The original idea to treat Cookies as a dictionary came from
37
#  Dave Mitchell (davem@magnet.com) in 1995, when he released the
38 39 40 41
#  first version of nscookie.py.
#
####

42
r"""
43 44 45 46 47 48 49 50
Here's a sample session to show how to use this module.
At the moment, this is the only documentation.

The Basics
----------

Importing is easy..

51
   >>> from http import cookies
52

53
Most of the time you start by creating a cookie.
54

55
   >>> C = cookies.SimpleCookie()
56 57 58 59

Once you've created your Cookie, you can add values just as if it were
a dictionary.

60
   >>> C = cookies.SimpleCookie()
61 62
   >>> C["fig"] = "newton"
   >>> C["sugar"] = "wafer"
63 64
   >>> C.output()
   'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer'
65 66 67 68

Notice that the printable representation of a Cookie is the
appropriate format for a Set-Cookie: header.  This is the
default behavior.  You can change the header and printed
69
attributes by using the .output() function
70

71
   >>> C = cookies.SimpleCookie()
72 73
   >>> C["rocky"] = "road"
   >>> C["rocky"]["path"] = "/cookie"
74
   >>> print(C.output(header="Cookie:"))
75
   Cookie: rocky=road; Path=/cookie
76
   >>> print(C.output(attrs=[], header="Cookie:"))
77
   Cookie: rocky=road
78 79 80 81 82

The load() method of a Cookie extracts cookies from a string.  In a
CGI script, you would use this method to extract the cookies from the
HTTP_COOKIE environment variable.

83
   >>> C = cookies.SimpleCookie()
84
   >>> C.load("chips=ahoy; vienna=finger")
85 86
   >>> C.output()
   'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger'
87 88 89 90 91

The load() method is darn-tootin smart about identifying cookies
within a string.  Escaped quotation marks, nested semicolons, and other
such trickeries do not confuse it.

92
   >>> C = cookies.SimpleCookie()
93
   >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";')
94
   >>> print(C)
95
   Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;"
96 97 98 99 100

Each element of the Cookie also supports all of the RFC 2109
Cookie attributes.  Here's an example which sets the Path
attribute.

101
   >>> C = cookies.SimpleCookie()
102 103
   >>> C["oreo"] = "doublestuff"
   >>> C["oreo"]["path"] = "/"
104
   >>> print(C)
105
   Set-Cookie: oreo=doublestuff; Path=/
106 107

Each dictionary element has a 'value' attribute, which gives you
Tim Peters's avatar
Tim Peters committed
108
back the value associated with the key.
109

110
   >>> C = cookies.SimpleCookie()
111 112 113 114 115 116 117 118
   >>> C["twix"] = "none for you"
   >>> C["twix"].value
   'none for you'

The SimpleCookie expects that all values should be standard strings.
Just to be sure, SimpleCookie invokes the str() builtin to convert
the value to a string, when the values are set dictionary-style.

119
   >>> C = cookies.SimpleCookie()
120 121 122 123 124 125
   >>> C["number"] = 7
   >>> C["string"] = "seven"
   >>> C["number"].value
   '7'
   >>> C["string"].value
   'seven'
126 127
   >>> C.output()
   'Set-Cookie: number=7\r\nSet-Cookie: string=seven'
128 129 130 131 132 133 134 135

Finis.
"""  #"
#     ^
#     |----helps out font-lock

#
# Import our required modules
Tim Peters's avatar
Tim Peters committed
136
#
137
import string
138

139
from pickle import dumps, loads
140

141
import re, warnings
142

143
__all__ = ["CookieError", "BaseCookie", "SimpleCookie"]
144

145
_nulljoin = ''.join
146
_semispacejoin = '; '.join
147
_spacejoin = ' '.join
148

149 150 151 152 153 154 155 156 157 158 159 160 161
#
# Define an exception visible to External modules
#
class CookieError(Exception):
    pass


# These quoting routines conform to the RFC2109 specification, which in
# turn references the character definitions from RFC2068.  They provide
# a two-way quoting algorithm.  Any non-text character is translated
# into a 4 character sequence: a forward-slash followed by the
# three-digit octal equivalent of the character.  Any '\' or '"' is
# quoted with a preceeding '\' slash.
Tim Peters's avatar
Tim Peters committed
162
#
163 164 165 166
# These are taken from RFC2068 and RFC2109.
#       _LegalChars       is the list of chars which don't require "'s
#       _Translator       hash-table for fast quoting
#
167
_LegalChars       = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~"
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
_Translator       = {
    '\000' : '\\000',  '\001' : '\\001',  '\002' : '\\002',
    '\003' : '\\003',  '\004' : '\\004',  '\005' : '\\005',
    '\006' : '\\006',  '\007' : '\\007',  '\010' : '\\010',
    '\011' : '\\011',  '\012' : '\\012',  '\013' : '\\013',
    '\014' : '\\014',  '\015' : '\\015',  '\016' : '\\016',
    '\017' : '\\017',  '\020' : '\\020',  '\021' : '\\021',
    '\022' : '\\022',  '\023' : '\\023',  '\024' : '\\024',
    '\025' : '\\025',  '\026' : '\\026',  '\027' : '\\027',
    '\030' : '\\030',  '\031' : '\\031',  '\032' : '\\032',
    '\033' : '\\033',  '\034' : '\\034',  '\035' : '\\035',
    '\036' : '\\036',  '\037' : '\\037',

    '"' : '\\"',       '\\' : '\\\\',

    '\177' : '\\177',  '\200' : '\\200',  '\201' : '\\201',
    '\202' : '\\202',  '\203' : '\\203',  '\204' : '\\204',
    '\205' : '\\205',  '\206' : '\\206',  '\207' : '\\207',
    '\210' : '\\210',  '\211' : '\\211',  '\212' : '\\212',
    '\213' : '\\213',  '\214' : '\\214',  '\215' : '\\215',
    '\216' : '\\216',  '\217' : '\\217',  '\220' : '\\220',
    '\221' : '\\221',  '\222' : '\\222',  '\223' : '\\223',
    '\224' : '\\224',  '\225' : '\\225',  '\226' : '\\226',
    '\227' : '\\227',  '\230' : '\\230',  '\231' : '\\231',
    '\232' : '\\232',  '\233' : '\\233',  '\234' : '\\234',
    '\235' : '\\235',  '\236' : '\\236',  '\237' : '\\237',
    '\240' : '\\240',  '\241' : '\\241',  '\242' : '\\242',
    '\243' : '\\243',  '\244' : '\\244',  '\245' : '\\245',
    '\246' : '\\246',  '\247' : '\\247',  '\250' : '\\250',
    '\251' : '\\251',  '\252' : '\\252',  '\253' : '\\253',
    '\254' : '\\254',  '\255' : '\\255',  '\256' : '\\256',
    '\257' : '\\257',  '\260' : '\\260',  '\261' : '\\261',
    '\262' : '\\262',  '\263' : '\\263',  '\264' : '\\264',
    '\265' : '\\265',  '\266' : '\\266',  '\267' : '\\267',
    '\270' : '\\270',  '\271' : '\\271',  '\272' : '\\272',
    '\273' : '\\273',  '\274' : '\\274',  '\275' : '\\275',
    '\276' : '\\276',  '\277' : '\\277',  '\300' : '\\300',
    '\301' : '\\301',  '\302' : '\\302',  '\303' : '\\303',
    '\304' : '\\304',  '\305' : '\\305',  '\306' : '\\306',
    '\307' : '\\307',  '\310' : '\\310',  '\311' : '\\311',
    '\312' : '\\312',  '\313' : '\\313',  '\314' : '\\314',
    '\315' : '\\315',  '\316' : '\\316',  '\317' : '\\317',
    '\320' : '\\320',  '\321' : '\\321',  '\322' : '\\322',
    '\323' : '\\323',  '\324' : '\\324',  '\325' : '\\325',
    '\326' : '\\326',  '\327' : '\\327',  '\330' : '\\330',
    '\331' : '\\331',  '\332' : '\\332',  '\333' : '\\333',
    '\334' : '\\334',  '\335' : '\\335',  '\336' : '\\336',
    '\337' : '\\337',  '\340' : '\\340',  '\341' : '\\341',
    '\342' : '\\342',  '\343' : '\\343',  '\344' : '\\344',
    '\345' : '\\345',  '\346' : '\\346',  '\347' : '\\347',
    '\350' : '\\350',  '\351' : '\\351',  '\352' : '\\352',
    '\353' : '\\353',  '\354' : '\\354',  '\355' : '\\355',
    '\356' : '\\356',  '\357' : '\\357',  '\360' : '\\360',
    '\361' : '\\361',  '\362' : '\\362',  '\363' : '\\363',
    '\364' : '\\364',  '\365' : '\\365',  '\366' : '\\366',
    '\367' : '\\367',  '\370' : '\\370',  '\371' : '\\371',
    '\372' : '\\372',  '\373' : '\\373',  '\374' : '\\374',
    '\375' : '\\375',  '\376' : '\\376',  '\377' : '\\377'
    }

228
def _quote(str, LegalChars=_LegalChars):
229 230 231 232 233 234
    #
    # If the string does not need to be double-quoted,
    # then just return the string.  Otherwise, surround
    # the string in doublequotes and precede quote (with a \)
    # special characters.
    #
235
    if all(c in LegalChars for c in str):
236
        return str
237
    else:
238
        return '"' + _nulljoin( map(_Translator.get, str, str) ) + '"'
239 240 241 242 243 244
# end _quote


_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")

245
def _unquote(str):
246 247 248 249 250 251 252 253 254 255 256 257
    # If there aren't any doublequotes,
    # then there can't be any special characters.  See RFC 2109.
    if  len(str) < 2:
        return str
    if str[0] != '"' or str[-1] != '"':
        return str

    # We have to assume that we must decode this string.
    # Down to work.

    # Remove the "s
    str = str[1:-1]
258

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
    # Check for special sequences.  Examples:
    #    \012 --> \n
    #    \"   --> "
    #
    i = 0
    n = len(str)
    res = []
    while 0 <= i < n:
        Omatch = _OctalPatt.search(str, i)
        Qmatch = _QuotePatt.search(str, i)
        if not Omatch and not Qmatch:              # Neither matched
            res.append(str[i:])
            break
        # else:
        j = k = -1
        if Omatch: j = Omatch.start(0)
        if Qmatch: k = Qmatch.start(0)
        if Qmatch and ( not Omatch or k < j ):     # QuotePatt matched
            res.append(str[i:k])
            res.append(str[k+1])
            i = k+2
        else:                                      # OctalPatt matched
            res.append(str[i:j])
282
            res.append( chr( int(str[j+1:j+4], 8) ) )
283
            i = j+4
284
    return _nulljoin(res)
285 286 287 288
# end _unquote

# The _getdate() routine is used to set the expiration time in
# the cookie's HTTP header.      By default, _getdate() returns the
Tim Peters's avatar
Tim Peters committed
289
# current time in the appropriate "expires" format for a
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
# Set-Cookie header.     The one optional argument is an offset from
# now, in seconds.      For example, an offset of -3600 means "one hour ago".
# The offset may be a floating point number.
#

_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

_monthname = [None,
              'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname):
    from time import gmtime, time
    now = time()
    year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future)
    return "%s, %02d-%3s-%4d %02d:%02d:%02d GMT" % \
           (weekdayname[wd], day, monthname[month], year, hh, mm, ss)


#
# A class to hold ONE key,value pair.
# In a cookie, each such pair may have several attributes.
#       so this class is used to keep the attributes associated
#       with the appropriate key,value pair.
# This class also includes a coded_value attribute, which
#       is used to hold the network representation of the
#       value.  This is most useful when Python objects are
#       pickled for network transit.
#

320
class Morsel(dict):
321 322 323
    # RFC 2109 lists these attributes as reserved:
    #   path       comment         domain
    #   max-age    secure      version
Tim Peters's avatar
Tim Peters committed
324
    #
325 326 327
    # For historical reasons, these attributes are also reserved:
    #   expires
    #
328 329 330
    # This is an extension from Microsoft:
    #   httponly
    #
331 332 333 334 335 336 337 338 339
    # This dictionary provides a mapping from the lowercase
    # variant on the left to the appropriate traditional
    # formatting on the right.
    _reserved = { "expires" : "expires",
                   "path"        : "Path",
                   "comment" : "Comment",
                   "domain"      : "Domain",
                   "max-age" : "Max-Age",
                   "secure"      : "secure",
340
                   "httponly"  : "httponly",
341 342
                   "version" : "Version",
                   }
343

344 345 346 347 348
    def __init__(self):
        # Set defaults
        self.key = self.value = self.coded_value = None

        # Set default attributes
349 350
        for K in self._reserved:
            dict.__setitem__(self, K, "")
351 352 353
    # end __init__

    def __setitem__(self, K, V):
354
        K = K.lower()
355
        if not K in self._reserved:
356
            raise CookieError("Invalid Attribute %s" % K)
357
        dict.__setitem__(self, K, V)
358 359 360
    # end __setitem__

    def isReservedKey(self, K):
361
        return K.lower() in self._reserved
362 363
    # end isReservedKey

364
    def set(self, key, val, coded_val, LegalChars=_LegalChars):
365 366
        # First we verify that the key isn't a reserved word
        # Second we make sure it only contains legal characters
367
        if key.lower() in self._reserved:
368
            raise CookieError("Attempt to set a reserved key: %s" % key)
369
        if any(c not in LegalChars for c in key):
370 371 372 373 374 375 376 377 378 379 380
            raise CookieError("Illegal key value: %s" % key)

        # It's a good key, so save it.
        self.key                 = key
        self.value               = val
        self.coded_value         = coded_val
    # end set

    def output(self, attrs=None, header = "Set-Cookie:"):
        return "%s %s" % ( header, self.OutputString(attrs) )

381
    __str__ = output
382

383 384 385
    def __repr__(self):
        return '<%s: %s=%s>' % (self.__class__.__name__,
                                self.key, repr(self.value) )
386

387 388 389
    def js_output(self, attrs=None):
        # Print javascript
        return """
390
        <script type="text/javascript">
391
        <!-- begin hiding
392
        document.cookie = \"%s\";
393 394
        // end hiding -->
        </script>
395
        """ % ( self.OutputString(attrs).replace('"',r'\"'))
396 397 398 399 400 401 402
    # end js_output()

    def OutputString(self, attrs=None):
        # Build up our result
        #
        result = []
        RA = result.append
403

404
        # First, the key=value pair
405
        RA("%s=%s" % (self.key, self.coded_value))
406 407

        # Now add any defined attributes
408
        if attrs is None:
409
            attrs = self._reserved
410
        items = sorted(self.items())
411
        for K,V in items:
412
            if V == "": continue
413 414
            if K not in attrs: continue
            if K == "expires" and type(V) == type(1):
415
                RA("%s=%s" % (self._reserved[K], _getdate(V)))
416
            elif K == "max-age" and type(V) == type(1):
417
                RA("%s=%d" % (self._reserved[K], V))
418
            elif K == "secure":
419
                RA(str(self._reserved[K]))
420 421
            elif K == "httponly":
                RA(str(self._reserved[K]))
422
            else:
423
                RA("%s=%s" % (self._reserved[K], V))
424

425
        # Return the result
426
        return _semispacejoin(result)
427 428 429 430 431 432 433 434 435 436 437 438 439 440
    # end OutputString
# end Morsel class



#
# Pattern for finding cookie
#
# This used to be strict parsing based on the RFC2109 and RFC2068
# specifications.  I have since discovered that MSIE 3.0x doesn't
# follow the character rules outlined in those specs.  As a
# result, the parsing rules here are less strict.
#

441
_LegalCharsPatt  = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]"
442 443 444
_CookiePattern = re.compile(
    r"(?x)"                       # This is a Verbose pattern
    r"(?P<key>"                   # Start of group 'key'
445
    ""+ _LegalCharsPatt +"+?"     # Any word of at least one letter, nongreedy
446 447 448 449 450 451 452 453
    r")"                          # End of group 'key'
    r"\s*=\s*"                    # Equal Sign
    r"(?P<val>"                   # Start of group 'val'
    r'"(?:[^\\"]|\\.)*"'            # Any doublequoted string
    r"|"                            # or
    ""+ _LegalCharsPatt +"*"        # Any word or empty string
    r")"                          # End of group 'val'
    r"\s*;?"                      # Probably ending in a semi-colon
454
    , re.ASCII)                   # May be removed if safe.
455 456 457 458 459 460


# At long last, here is the cookie class.
#   Using this class is almost just like using a dictionary.
# See this module's docstring for example usage.
#
461
class BaseCookie(dict):
462 463
    # A container class for a set of Morsels
    #
464

465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
    def value_decode(self, val):
        """real_value, coded_value = value_decode(STRING)
        Called prior to setting a cookie's value from the network
        representation.  The VALUE is the value read from HTTP
        header.
        Override this function to modify the behavior of cookies.
        """
        return val, val
    # end value_encode

    def value_encode(self, val):
        """real_value, coded_value = value_encode(VALUE)
        Called prior to setting a cookie's value from the dictionary
        representation.  The VALUE is the value being assigned.
        Override this function to modify the behavior of cookies.
        """
        strval = str(val)
        return strval, strval
    # end value_encode
484

485 486 487 488 489 490 491 492
    def __init__(self, input=None):
        if input: self.load(input)
    # end __init__

    def __set(self, key, real_value, coded_value):
        """Private method for setting a cookie's value"""
        M = self.get(key, Morsel())
        M.set(key, real_value, coded_value)
493
        dict.__setitem__(self, key, M)
494 495 496 497 498 499 500 501
    # end __set

    def __setitem__(self, key, value):
        """Dictionary style assignment."""
        rval, cval = self.value_encode(value)
        self.__set(key, rval, cval)
    # end __setitem__

502
    def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"):
503 504
        """Return a string suitable for HTTP."""
        result = []
505
        items = sorted(self.items())
506
        for K,V in items:
507
            result.append( V.output(attrs, header) )
508
        return sep.join(result)
509 510
    # end output

511 512 513 514
    __str__ = output

    def __repr__(self):
        L = []
515
        items = sorted(self.items())
516
        for K,V in items:
517
            L.append( '%s=%s' % (K,repr(V.value) ) )
518
        return '<%s: %s>' % (self.__class__.__name__, _spacejoin(L))
519

520 521 522
    def js_output(self, attrs=None):
        """Return a string suitable for JavaScript."""
        result = []
523
        items = sorted(self.items())
524
        for K,V in items:
525
            result.append( V.js_output(attrs) )
526
        return _nulljoin(result)
527 528 529 530 531 532 533 534 535 536 537
    # end js_output

    def load(self, rawdata):
        """Load cookies from a string (presumably HTTP_COOKIE) or
        from a dictionary.  Loading cookies from a dictionary 'd'
        is equivalent to calling:
            map(Cookie.__setitem__, d.keys(), d.values())
        """
        if type(rawdata) == type(""):
            self.__ParseString(rawdata)
        else:
538 539 540
            # self.update() wouldn't call our custom __setitem__
            for k, v in rawdata.items():
                self[k] = v
541 542
        return
    # end load()
543

544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
    def __ParseString(self, str, patt=_CookiePattern):
        i = 0            # Our starting point
        n = len(str)     # Length of string
        M = None         # current morsel

        while 0 <= i < n:
            # Start looking for a cookie
            match = patt.search(str, i)
            if not match: break          # No more cookies

            K,V = match.group("key"), match.group("val")
            i = match.end(0)

            # Parse the key, value in case it's metainfo
            if K[0] == "$":
                # We ignore attributes which pertain to the cookie
                # mechanism as a whole.  See RFC 2109.
                # (Does anyone care?)
                if M:
                    M[ K[1:] ] = V
564
            elif K.lower() in Morsel._reserved:
565
                if M:
566
                    M[ K ] = _unquote(V)
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
            else:
                rval, cval = self.value_decode(V)
                self.__set(K, rval, cval)
                M = self[K]
    # end __ParseString
# end BaseCookie class

class SimpleCookie(BaseCookie):
    """SimpleCookie
    SimpleCookie supports strings as cookie values.  When setting
    the value using the dictionary assignment notation, SimpleCookie
    calls the builtin str() to convert the value to a string.  Values
    received from HTTP are kept as strings.
    """
    def value_decode(self, val):
        return _unquote( val ), val
    def value_encode(self, val):
        strval = str(val)
        return strval, _quote( strval )
# end SimpleCookie

#
###########################################################

591
def _test():
592 593
    import doctest, http.cookies
    return doctest.testmod(http.cookies)
594 595 596

if __name__ == "__main__":
    _test()