CGIHTTPServer.py 12.6 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4 5
"""CGI-savvy HTTP Server.

This module builds on SimpleHTTPServer by implementing GET and POST
requests to cgi-bin scripts.

6 7 8 9 10 11 12 13 14 15
If the os.fork() function is not present (e.g. on Windows),
os.popen2() is used as a fallback, with slightly altered semantics; if
that function is not present either (e.g. on Macintosh), only Python
scripts are supported, and they are executed by the current process.

In all cases, the implementation is intentionally naive -- all
requests are executed sychronously.

SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL
-- it may execute arbitrary Python code or external programs.
16

17 18
Note that status code 200 is sent prior to execution of a CGI script, so
scripts cannot send other status codes such as 302 (redirect).
Guido van Rossum's avatar
Guido van Rossum committed
19 20 21
"""


22
__version__ = "0.4"
Guido van Rossum's avatar
Guido van Rossum committed
23

24
__all__ = ["CGIHTTPRequestHandler"]
Guido van Rossum's avatar
Guido van Rossum committed
25 26

import os
27
import sys
Guido van Rossum's avatar
Guido van Rossum committed
28 29 30
import urllib
import BaseHTTPServer
import SimpleHTTPServer
31
import select
Guido van Rossum's avatar
Guido van Rossum committed
32 33 34 35 36 37 38 39 40 41 42 43


class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):

    """Complete HTTP server with GET, HEAD and POST commands.

    GET and HEAD also support running CGI scripts.

    The POST command is *only* implemented for CGI scripts.

    """

44 45 46
    # Determine platform specifics
    have_fork = hasattr(os, 'fork')
    have_popen2 = hasattr(os, 'popen2')
47
    have_popen3 = hasattr(os, 'popen3')
48

49 50 51 52
    # Make rfile unbuffered -- we need to read one line and then pass
    # the rest to a subprocess, so we can't use buffered input.
    rbufsize = 0

Guido van Rossum's avatar
Guido van Rossum committed
53
    def do_POST(self):
54
        """Serve a POST request.
Guido van Rossum's avatar
Guido van Rossum committed
55

56
        This is only implemented for CGI scripts.
Guido van Rossum's avatar
Guido van Rossum committed
57

58
        """
Guido van Rossum's avatar
Guido van Rossum committed
59

60 61 62 63
        if self.is_cgi():
            self.run_cgi()
        else:
            self.send_error(501, "Can only POST to CGI scripts")
Guido van Rossum's avatar
Guido van Rossum committed
64 65

    def send_head(self):
66 67 68 69 70
        """Version of send_head that support CGI scripts"""
        if self.is_cgi():
            return self.run_cgi()
        else:
            return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self)
Guido van Rossum's avatar
Guido van Rossum committed
71 72

    def is_cgi(self):
73
        """Test whether self.path corresponds to a CGI script.
Guido van Rossum's avatar
Guido van Rossum committed
74

75 76 77
        Returns True and updates the cgi_info attribute to the tuple
        (dir, rest) if self.path requires running a CGI script.
        Returns False otherwise.
Guido van Rossum's avatar
Guido van Rossum committed
78

Gregory P. Smith's avatar
Gregory P. Smith committed
79 80 81
        If any exception is raised, the caller should assume that
        self.path was rejected as invalid and act accordingly.

82 83 84 85 86 87 88 89
        The default implementation tests whether the normalized url
        path begins with one of the strings in self.cgi_directories
        (and the next character is a '/' or the end of the string).
        """
        splitpath = _url_collapse_path_split(self.path)
        if splitpath[0] in self.cgi_directories:
            self.cgi_info = splitpath
            return True
90
        return False
Guido van Rossum's avatar
Guido van Rossum committed
91 92 93

    cgi_directories = ['/cgi-bin', '/htbin']

94 95 96 97 98 99 100 101 102
    def is_executable(self, path):
        """Test whether argument path is an executable file."""
        return executable(path)

    def is_python(self, path):
        """Test whether argument path is a Python script."""
        head, tail = os.path.splitext(path)
        return tail.lower() in (".py", ".pyw")

Guido van Rossum's avatar
Guido van Rossum committed
103
    def run_cgi(self):
104
        """Execute a CGI script."""
105
        path = self.path
106
        dir, rest = self.cgi_info
Tim Peters's avatar
Tim Peters committed
107

108 109 110 111 112 113 114 115 116 117 118 119 120
        i = path.find('/', len(dir) + 1)
        while i >= 0:
            nextdir = path[:i]
            nextrest = path[i+1:]

            scriptdir = self.translate_path(nextdir)
            if os.path.isdir(scriptdir):
                dir, rest = nextdir, nextrest
                i = path.find('/', len(dir) + 1)
            else:
                break

        # find an explicit query string, if present.
121
        i = rest.rfind('?')
122 123 124 125
        if i >= 0:
            rest, query = rest[:i], rest[i+1:]
        else:
            query = ''
126 127 128

        # dissect the part after the directory name into a script name &
        # a possible additional path, to be stored in PATH_INFO.
129
        i = rest.find('/')
130 131 132 133
        if i >= 0:
            script, rest = rest[:i], rest[i:]
        else:
            script, rest = rest, ''
134

135 136 137
        scriptname = dir + '/' + script
        scriptfile = self.translate_path(scriptname)
        if not os.path.exists(scriptfile):
138
            self.send_error(404, "No such CGI script (%r)" % scriptname)
139 140
            return
        if not os.path.isfile(scriptfile):
141
            self.send_error(403, "CGI script is not a plain file (%r)" %
142
                            scriptname)
143
            return
144 145
        ispy = self.is_python(scriptname)
        if not ispy:
146
            if not (self.have_fork or self.have_popen2 or self.have_popen3):
147 148
                self.send_error(403, "CGI script is not a Python script (%r)" %
                                scriptname)
149 150
                return
            if not self.is_executable(scriptfile):
151 152
                self.send_error(403, "CGI script is not executable (%r)" %
                                scriptname)
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
                return

        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html
        # XXX Much of the following could be prepared ahead of time!
        env = {}
        env['SERVER_SOFTWARE'] = self.version_string()
        env['SERVER_NAME'] = self.server.server_name
        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
        env['SERVER_PROTOCOL'] = self.protocol_version
        env['SERVER_PORT'] = str(self.server.server_port)
        env['REQUEST_METHOD'] = self.command
        uqrest = urllib.unquote(rest)
        env['PATH_INFO'] = uqrest
        env['PATH_TRANSLATED'] = self.translate_path(uqrest)
        env['SCRIPT_NAME'] = scriptname
        if query:
            env['QUERY_STRING'] = query
        host = self.address_string()
        if host != self.client_address[0]:
            env['REMOTE_HOST'] = host
        env['REMOTE_ADDR'] = self.client_address[0]
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
        authorization = self.headers.getheader("authorization")
        if authorization:
            authorization = authorization.split()
            if len(authorization) == 2:
                import base64, binascii
                env['AUTH_TYPE'] = authorization[0]
                if authorization[0].lower() == "basic":
                    try:
                        authorization = base64.decodestring(authorization[1])
                    except binascii.Error:
                        pass
                    else:
                        authorization = authorization.split(':')
                        if len(authorization) == 2:
                            env['REMOTE_USER'] = authorization[0]
189 190 191 192 193 194 195 196
        # XXX REMOTE_IDENT
        if self.headers.typeheader is None:
            env['CONTENT_TYPE'] = self.headers.type
        else:
            env['CONTENT_TYPE'] = self.headers.typeheader
        length = self.headers.getheader('content-length')
        if length:
            env['CONTENT_LENGTH'] = length
197 198 199
        referer = self.headers.getheader('referer')
        if referer:
            env['HTTP_REFERER'] = referer
200 201
        accept = []
        for line in self.headers.getallmatchingheaders('accept'):
202
            if line[:1] in "\t\n\r ":
203
                accept.append(line.strip())
204
            else:
205 206
                accept = accept + line[7:].split(',')
        env['HTTP_ACCEPT'] = ','.join(accept)
207 208 209 210 211
        ua = self.headers.getheader('user-agent')
        if ua:
            env['HTTP_USER_AGENT'] = ua
        co = filter(None, self.headers.getheaders('cookie'))
        if co:
212
            env['HTTP_COOKIE'] = ', '.join(co)
213
        # XXX Other HTTP_* headers
Guido van Rossum's avatar
Guido van Rossum committed
214 215 216
        # Since we're setting the env in the parent, provide empty
        # values to override previously set values
        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
217
                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
Guido van Rossum's avatar
Guido van Rossum committed
218
            env.setdefault(k, "")
219
        os.environ.update(env)
220

221
        self.send_response(200, "Script output follows")
222

223
        decoded_query = query.replace('+', ' ')
224 225 226 227 228 229 230 231 232 233 234 235

        if self.have_fork:
            # Unix -- fork as we should
            args = [script]
            if '=' not in decoded_query:
                args.append(decoded_query)
            nobody = nobody_uid()
            self.wfile.flush() # Always flush before forking
            pid = os.fork()
            if pid != 0:
                # Parent
                pid, sts = os.waitpid(pid, 0)
236 237
                # throw away additional data [see bug #427345]
                while select.select([self.rfile], [], [], 0)[0]:
238 239
                    if not self.rfile.read(1):
                        break
240 241 242 243 244 245 246 247 248 249 250
                if sts:
                    self.log_error("CGI script exit status %#x", sts)
                return
            # Child
            try:
                try:
                    os.setuid(nobody)
                except os.error:
                    pass
                os.dup2(self.rfile.fileno(), 0)
                os.dup2(self.wfile.fileno(), 1)
251
                os.execve(scriptfile, args, os.environ)
252 253 254 255
            except:
                self.server.handle_error(self.request, self.client_address)
                os._exit(127)

256 257 258 259
        else:
            # Non Unix - use subprocess
            import subprocess
            cmdline = [scriptfile]
260 261 262
            if self.is_python(scriptfile):
                interp = sys.executable
                if interp.lower().endswith("w.exe"):
263 264
                    # On Windows, use python.exe, not pythonw.exe
                    interp = interp[:-5] + interp[-4:]
265 266 267 268 269
                cmdline = [interp, '-u'] + cmdline
            if '=' not in query:
                cmdline.append(query)

            self.log_message("command: %s", subprocess.list2cmdline(cmdline))
270 271
            try:
                nbytes = int(length)
272
            except (TypeError, ValueError):
273
                nbytes = 0
274 275 276 277 278
            p = subprocess.Popen(cmdline,
                                 stdin = subprocess.PIPE,
                                 stdout = subprocess.PIPE,
                                 stderr = subprocess.PIPE
                                )
279 280
            if self.command.lower() == "post" and nbytes > 0:
                data = self.rfile.read(nbytes)
281 282
            else:
                data = None
283 284
            # throw away additional data [see bug #427345]
            while select.select([self.rfile._sock], [], [], 0)[0]:
285 286
                if not self.rfile._sock.recv(1):
                    break
287 288 289 290 291 292 293
            stdout, stderr = p.communicate(data)
            self.wfile.write(stdout)
            if stderr:
                self.log_error('%s', stderr)
            status = p.returncode
            if status:
                self.log_error("CGI script exit status %#x", status)
294
            else:
Guido van Rossum's avatar
Guido van Rossum committed
295
                self.log_message("CGI script exited OK")
Guido van Rossum's avatar
Guido van Rossum committed
296 297


298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
# TODO(gregory.p.smith): Move this into an appropriate library.
def _url_collapse_path_split(path):
    """
    Given a URL path, remove extra '/'s and '.' path elements and collapse
    any '..' references.

    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.

    Returns: A tuple of (head, tail) where tail is everything after the final /
    and head is everything before it.  Head will always start with a '/' and,
    if it contains anything else, never have a trailing '/'.

    Raises: IndexError if too many '..' occur within the path.
    """
    # Similar to os.path.split(os.path.normpath(path)) but specific to URL
    # path semantics rather than local operating system semantics.
    path_parts = []
    for part in path.split('/'):
        if part == '.':
            path_parts.append('')
        else:
            path_parts.append(part)
    # Filter out blank non trailing parts before consuming the '..'.
    path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:]
    if path_parts:
        tail_part = path_parts.pop()
    else:
        tail_part = ''
    head_parts = []
    for part in path_parts:
        if part == '..':
            head_parts.pop()
        else:
            head_parts.append(part)
    if tail_part and tail_part == '..':
        head_parts.pop()
        tail_part = ''
    return ('/' + '/'.join(head_parts), tail_part)


Guido van Rossum's avatar
Guido van Rossum committed
338 339 340 341 342 343
nobody = None

def nobody_uid():
    """Internal routine to get nobody's uid"""
    global nobody
    if nobody:
344
        return nobody
345 346 347 348
    try:
        import pwd
    except ImportError:
        return -1
Guido van Rossum's avatar
Guido van Rossum committed
349
    try:
350
        nobody = pwd.getpwnam('nobody')[2]
351
    except KeyError:
352
        nobody = 1 + max(map(lambda x: x[2], pwd.getpwall()))
Guido van Rossum's avatar
Guido van Rossum committed
353 354 355 356 357 358
    return nobody


def executable(path):
    """Test for executable file."""
    try:
359
        st = os.stat(path)
Guido van Rossum's avatar
Guido van Rossum committed
360
    except os.error:
361
        return False
362
    return st.st_mode & 0111 != 0
Guido van Rossum's avatar
Guido van Rossum committed
363 364 365


def test(HandlerClass = CGIHTTPRequestHandler,
366
         ServerClass = BaseHTTPServer.HTTPServer):
Guido van Rossum's avatar
Guido van Rossum committed
367 368 369 370 371
    SimpleHTTPServer.test(HandlerClass, ServerClass)


if __name__ == '__main__':
    test()