"""HTTP server base class. | |
Note: the class in this module doesn't implement any HTTP request; see | |
SimpleHTTPServer for simple implementations of GET, HEAD and POST | |
(including CGI scripts). It does, however, optionally implement HTTP/1.1 | |
persistent connections, as of version 0.3. | |
Contents: | |
- BaseHTTPRequestHandler: HTTP request handler base class | |
- test: test function | |
XXX To do: | |
- log requests even later (to capture byte count) | |
- log user-agent header and other interesting goodies | |
- send error log to separate file | |
""" | |
# See also: | |
# | |
# HTTP Working Group T. Berners-Lee | |
# INTERNET-DRAFT R. T. Fielding | |
# <draft-ietf-http-v10-spec-00.txt> H. Frystyk Nielsen | |
# Expires September 8, 1995 March 8, 1995 | |
# | |
# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt | |
# | |
# and | |
# | |
# Network Working Group R. Fielding | |
# Request for Comments: 2616 et al | |
# Obsoletes: 2068 June 1999 | |
# Category: Standards Track | |
# | |
# URL: http://www.faqs.org/rfcs/rfc2616.html | |
# Log files | |
# --------- | |
# | |
# Here's a quote from the NCSA httpd docs about log file format. | |
# | |
# | The logfile format is as follows. Each line consists of: | |
# | | |
# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb | |
# | | |
# | host: Either the DNS name or the IP number of the remote client | |
# | rfc931: Any information returned by identd for this person, | |
# | - otherwise. | |
# | authuser: If user sent a userid for authentication, the user name, | |
# | - otherwise. | |
# | DD: Day | |
# | Mon: Month (calendar name) | |
# | YYYY: Year | |
# | hh: hour (24-hour format, the machine's timezone) | |
# | mm: minutes | |
# | ss: seconds | |
# | request: The first line of the HTTP request as sent by the client. | |
# | ddd: the status code returned by the server, - if not available. | |
# | bbbb: the total number of bytes sent, | |
# | *not including the HTTP/1.0 header*, - if not available | |
# | | |
# | You can determine the name of the file accessed through request. | |
# | |
# (Actually, the latter is only true if you know the server configuration | |
# at the time the request was made!) | |
__version__ = "0.3" | |
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] | |
import sys | |
import time | |
import socket # For gethostbyaddr() | |
from warnings import filterwarnings, catch_warnings | |
with catch_warnings(): | |
if sys.py3kwarning: | |
filterwarnings("ignore", ".*mimetools has been removed", | |
DeprecationWarning) | |
import mimetools | |
import SocketServer | |
# Default error message template | |
DEFAULT_ERROR_MESSAGE = """\ | |
<head> | |
<title>Error response</title> | |
</head> | |
<body> | |
<h1>Error response</h1> | |
<p>Error code %(code)d. | |
<p>Message: %(message)s. | |
<p>Error code explanation: %(code)s = %(explain)s. | |
</body> | |
""" | |
DEFAULT_ERROR_CONTENT_TYPE = "text/html" | |
def _quote_html(html): | |
return html.replace("&", "&").replace("<", "<").replace(">", ">") | |
class HTTPServer(SocketServer.TCPServer): | |
allow_reuse_address = 1 # Seems to make sense in testing environment | |
def server_bind(self): | |
"""Override server_bind to store the server name.""" | |
SocketServer.TCPServer.server_bind(self) | |
host, port = self.socket.getsockname()[:2] | |
self.server_name = socket.getfqdn(host) | |
self.server_port = port | |
class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): | |
"""HTTP request handler base class. | |
The following explanation of HTTP serves to guide you through the | |
code as well as to expose any misunderstandings I may have about | |
HTTP (so you don't need to read the code to figure out I'm wrong | |
:-). | |
HTTP (HyperText Transfer Protocol) is an extensible protocol on | |
top of a reliable stream transport (e.g. TCP/IP). The protocol | |
recognizes three parts to a request: | |
1. One line identifying the request type and path | |
2. An optional set of RFC-822-style headers | |
3. An optional data part | |
The headers and data are separated by a blank line. | |
The first line of the request has the form | |
<command> <path> <version> | |
where <command> is a (case-sensitive) keyword such as GET or POST, | |
<path> is a string containing path information for the request, | |
and <version> should be the string "HTTP/1.0" or "HTTP/1.1". | |
<path> is encoded using the URL encoding scheme (using %xx to signify | |
the ASCII character with hex code xx). | |
The specification specifies that lines are separated by CRLF but | |
for compatibility with the widest range of clients recommends | |
servers also handle LF. Similarly, whitespace in the request line | |
is treated sensibly (allowing multiple spaces between components | |
and allowing trailing whitespace). | |
Similarly, for output, lines ought to be separated by CRLF pairs | |
but most clients grok LF characters just fine. | |
If the first line of the request has the form | |
<command> <path> | |
(i.e. <version> is left out) then this is assumed to be an HTTP | |
0.9 request; this form has no optional headers and data part and | |
the reply consists of just the data. | |
The reply form of the HTTP 1.x protocol again has three parts: | |
1. One line giving the response code | |
2. An optional set of RFC-822-style headers | |
3. The data | |
Again, the headers and data are separated by a blank line. | |
The response code line has the form | |
<version> <responsecode> <responsestring> | |
where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"), | |
<responsecode> is a 3-digit response code indicating success or | |
failure of the request, and <responsestring> is an optional | |
human-readable string explaining what the response code means. | |
This server parses the request and the headers, and then calls a | |
function specific to the request type (<command>). Specifically, | |
a request SPAM will be handled by a method do_SPAM(). If no | |
such method exists the server sends an error response to the | |
client. If it exists, it is called with no arguments: | |
do_SPAM() | |
Note that the request name is case sensitive (i.e. SPAM and spam | |
are different requests). | |
The various request details are stored in instance variables: | |
- client_address is the client IP address in the form (host, | |
port); | |
- command, path and version are the broken-down request line; | |
- headers is an instance of mimetools.Message (or a derived | |
class) containing the header information; | |
- rfile is a file object open for reading positioned at the | |
start of the optional input data part; | |
- wfile is a file object open for writing. | |
IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! | |
The first thing to be written must be the response line. Then | |
follow 0 or more header lines, then a blank line, and then the | |
actual data (if any). The meaning of the header lines depends on | |
the command executed by the server; in most cases, when data is | |
returned, there should be at least one header line of the form | |
Content-type: <type>/<subtype> | |
where <type> and <subtype> should be registered MIME types, | |
e.g. "text/html" or "text/plain". | |
""" | |
# The Python system version, truncated to its first component. | |
sys_version = "Python/" + sys.version.split()[0] | |
# The server software version. You may want to override this. | |
# The format is multiple whitespace-separated strings, | |
# where each string is of the form name[/version]. | |
server_version = "BaseHTTP/" + __version__ | |
# The default request version. This only affects responses up until | |
# the point where the request line is parsed, so it mainly decides what | |
# the client gets back when sending a malformed request line. | |
# Most web servers default to HTTP 0.9, i.e. don't send a status line. | |
default_request_version = "HTTP/0.9" | |
def parse_request(self): | |
"""Parse a request (internal). | |
The request should be stored in self.raw_requestline; the results | |
are in self.command, self.path, self.request_version and | |
self.headers. | |
Return True for success, False for failure; on failure, an | |
error is sent back. | |
""" | |
self.command = None # set in case of error on the first line | |
self.request_version = version = self.default_request_version | |
self.close_connection = 1 | |
requestline = self.raw_requestline | |
if requestline[-2:] == '\r\n': | |
requestline = requestline[:-2] | |
elif requestline[-1:] == '\n': | |
requestline = requestline[:-1] | |
self.requestline = requestline | |
words = requestline.split() | |
if len(words) == 3: | |
[command, path, version] = words | |
if version[:5] != 'HTTP/': | |
self.send_error(400, "Bad request version (%r)" % version) | |
return False | |
try: | |
base_version_number = version.split('/', 1)[1] | |
version_number = base_version_number.split(".") | |
# RFC 2145 section 3.1 says there can be only one "." and | |
# - major and minor numbers MUST be treated as | |
# separate integers; | |
# - HTTP/2.4 is a lower version than HTTP/2.13, which in | |
# turn is lower than HTTP/12.3; | |
# - Leading zeros MUST be ignored by recipients. | |
if len(version_number) != 2: | |
raise ValueError | |
version_number = int(version_number[0]), int(version_number[1]) | |
except (ValueError, IndexError): | |
self.send_error(400, "Bad request version (%r)" % version) | |
return False | |
if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": | |
self.close_connection = 0 | |
if version_number >= (2, 0): | |
self.send_error(505, | |
"Invalid HTTP Version (%s)" % base_version_number) | |
return False | |
elif len(words) == 2: | |
[command, path] = words | |
self.close_connection = 1 | |
if command != 'GET': | |
self.send_error(400, | |
"Bad HTTP/0.9 request type (%r)" % command) | |
return False | |
elif not words: | |
return False | |
else: | |
self.send_error(400, "Bad request syntax (%r)" % requestline) | |
return False | |
self.command, self.path, self.request_version = command, path, version | |
# Examine the headers and look for a Connection directive | |
self.headers = self.MessageClass(self.rfile, 0) | |
conntype = self.headers.get('Connection', "") | |
if conntype.lower() == 'close': | |
self.close_connection = 1 | |
elif (conntype.lower() == 'keep-alive' and | |
self.protocol_version >= "HTTP/1.1"): | |
self.close_connection = 0 | |
return True | |
def handle_one_request(self): | |
"""Handle a single HTTP request. | |
You normally don't need to override this method; see the class | |
__doc__ string for information on how to handle specific HTTP | |
commands such as GET and POST. | |
""" | |
try: | |
self.raw_requestline = self.rfile.readline(65537) | |
if len(self.raw_requestline) > 65536: | |
self.requestline = '' | |
self.request_version = '' | |
self.command = '' | |
self.send_error(414) | |
return | |
if not self.raw_requestline: | |
self.close_connection = 1 | |
return | |
if not self.parse_request(): | |
# An error code has been sent, just exit | |
return | |
mname = 'do_' + self.command | |
if not hasattr(self, mname): | |
self.send_error(501, "Unsupported method (%r)" % self.command) | |
return | |
method = getattr(self, mname) | |
method() | |
self.wfile.flush() #actually send the response if not already done. | |
except socket.timeout, e: | |
#a read or a write timed out. Discard this connection | |
self.log_error("Request timed out: %r", e) | |
self.close_connection = 1 | |
return | |
def handle(self): | |
"""Handle multiple requests if necessary.""" | |
self.close_connection = 1 | |
self.handle_one_request() | |
while not self.close_connection: | |
self.handle_one_request() | |
def send_error(self, code, message=None): | |
"""Send and log an error reply. | |
Arguments are the error code, and a detailed message. | |
The detailed message defaults to the short entry matching the | |
response code. | |
This sends an error response (so it must be called before any | |
output has been generated), logs the error, and finally sends | |
a piece of HTML explaining the error to the user. | |
""" | |
try: | |
short, long = self.responses[code] | |
except KeyError: | |
short, long = '???', '???' | |
if message is None: | |
message = short | |
explain = long | |
self.log_error("code %d, message %s", code, message) | |
# using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) | |
content = (self.error_message_format % | |
{'code': code, 'message': _quote_html(message), 'explain': explain}) | |
self.send_response(code, message) | |
self.send_header("Content-Type", self.error_content_type) | |
self.send_header('Connection', 'close') | |
self.end_headers() | |
if self.command != 'HEAD' and code >= 200 and code not in (204, 304): | |
self.wfile.write(content) | |
error_message_format = DEFAULT_ERROR_MESSAGE | |
error_content_type = DEFAULT_ERROR_CONTENT_TYPE | |
def send_response(self, code, message=None): | |
"""Send the response header and log the response code. | |
Also send two standard headers with the server software | |
version and the current date. | |
""" | |
self.log_request(code) | |
if message is None: | |
if code in self.responses: | |
message = self.responses[code][0] | |
else: | |
message = '' | |
if self.request_version != 'HTTP/0.9': | |
self.wfile.write("%s %d %s\r\n" % | |
(self.protocol_version, code, message)) | |
# print (self.protocol_version, code, message) | |
self.send_header('Server', self.version_string()) | |
self.send_header('Date', self.date_time_string()) | |
def send_header(self, keyword, value): | |
"""Send a MIME header.""" | |
if self.request_version != 'HTTP/0.9': | |
self.wfile.write("%s: %s\r\n" % (keyword, value)) | |
if keyword.lower() == 'connection': | |
if value.lower() == 'close': | |
self.close_connection = 1 | |
elif value.lower() == 'keep-alive': | |
self.close_connection = 0 | |
def end_headers(self): | |
"""Send the blank line ending the MIME headers.""" | |
if self.request_version != 'HTTP/0.9': | |
self.wfile.write("\r\n") | |
def log_request(self, code='-', size='-'): | |
"""Log an accepted request. | |
This is called by send_response(). | |
""" | |
self.log_message('"%s" %s %s', | |
self.requestline, str(code), str(size)) | |
def log_error(self, format, *args): | |
"""Log an error. | |
This is called when a request cannot be fulfilled. By | |
default it passes the message on to log_message(). | |
Arguments are the same as for log_message(). | |
XXX This should go to the separate error log. | |
""" | |
self.log_message(format, *args) | |
def log_message(self, format, *args): | |
"""Log an arbitrary message. | |
This is used by all other logging functions. Override | |
it if you have specific logging wishes. | |
The first argument, FORMAT, is a format string for the | |
message to be logged. If the format string contains | |
any % escapes requiring parameters, they should be | |
specified as subsequent arguments (it's just like | |
printf!). | |
The client host and current date/time are prefixed to | |
every message. | |
""" | |
sys.stderr.write("%s - - [%s] %s\n" % | |
(self.address_string(), | |
self.log_date_time_string(), | |
format%args)) | |
def version_string(self): | |
"""Return the server software version string.""" | |
return self.server_version + ' ' + self.sys_version | |
def date_time_string(self, timestamp=None): | |
"""Return the current date and time formatted for a message header.""" | |
if timestamp is None: | |
timestamp = time.time() | |
year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) | |
s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( | |
self.weekdayname[wd], | |
day, self.monthname[month], year, | |
hh, mm, ss) | |
return s | |
def log_date_time_string(self): | |
"""Return the current time formatted for logging.""" | |
now = time.time() | |
year, month, day, hh, mm, ss, x, y, z = time.localtime(now) | |
s = "%02d/%3s/%04d %02d:%02d:%02d" % ( | |
day, self.monthname[month], year, hh, mm, ss) | |
return s | |
weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] | |
monthname = [None, | |
'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', | |
'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] | |
def address_string(self): | |
"""Return the client address formatted for logging. | |
This version looks up the full hostname using gethostbyaddr(), | |
and tries to find a name that contains at least one dot. | |
""" | |
host, port = self.client_address[:2] | |
return socket.getfqdn(host) | |
# Essentially static class variables | |
# The version of the HTTP protocol we support. | |
# Set this to HTTP/1.1 to enable automatic keepalive | |
protocol_version = "HTTP/1.0" | |
# The Message-like class used to parse headers | |
MessageClass = mimetools.Message | |
# Table mapping response codes to messages; entries have the | |
# form {code: (shortmessage, longmessage)}. | |
# See RFC 2616. | |
responses = { | |
100: ('Continue', 'Request received, please continue'), | |
101: ('Switching Protocols', | |
'Switching to new protocol; obey Upgrade header'), | |
200: ('OK', 'Request fulfilled, document follows'), | |
201: ('Created', 'Document created, URL follows'), | |
202: ('Accepted', | |
'Request accepted, processing continues off-line'), | |
203: ('Non-Authoritative Information', 'Request fulfilled from cache'), | |
204: ('No Content', 'Request fulfilled, nothing follows'), | |
205: ('Reset Content', 'Clear input form for further input.'), | |
206: ('Partial Content', 'Partial content follows.'), | |
300: ('Multiple Choices', | |
'Object has several resources -- see URI list'), | |
301: ('Moved Permanently', 'Object moved permanently -- see URI list'), | |
302: ('Found', 'Object moved temporarily -- see URI list'), | |
303: ('See Other', 'Object moved -- see Method and URL list'), | |
304: ('Not Modified', | |
'Document has not changed since given time'), | |
305: ('Use Proxy', | |
'You must use proxy specified in Location to access this ' | |
'resource.'), | |
307: ('Temporary Redirect', | |
'Object moved temporarily -- see URI list'), | |
400: ('Bad Request', | |
'Bad request syntax or unsupported method'), | |
401: ('Unauthorized', | |
'No permission -- see authorization schemes'), | |
402: ('Payment Required', | |
'No payment -- see charging schemes'), | |
403: ('Forbidden', | |
'Request forbidden -- authorization will not help'), | |
404: ('Not Found', 'Nothing matches the given URI'), | |
405: ('Method Not Allowed', | |
'Specified method is invalid for this resource.'), | |
406: ('Not Acceptable', 'URI not available in preferred format.'), | |
407: ('Proxy Authentication Required', 'You must authenticate with ' | |
'this proxy before proceeding.'), | |
408: ('Request Timeout', 'Request timed out; try again later.'), | |
409: ('Conflict', 'Request conflict.'), | |
410: ('Gone', | |
'URI no longer exists and has been permanently removed.'), | |
411: ('Length Required', 'Client must specify Content-Length.'), | |
412: ('Precondition Failed', 'Precondition in headers is false.'), | |
413: ('Request Entity Too Large', 'Entity is too large.'), | |
414: ('Request-URI Too Long', 'URI is too long.'), | |
415: ('Unsupported Media Type', 'Entity body in unsupported format.'), | |
416: ('Requested Range Not Satisfiable', | |
'Cannot satisfy request range.'), | |
417: ('Expectation Failed', | |
'Expect condition could not be satisfied.'), | |
500: ('Internal Server Error', 'Server got itself in trouble'), | |
501: ('Not Implemented', | |
'Server does not support this operation'), | |
502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), | |
503: ('Service Unavailable', | |
'The server cannot process the request due to a high load'), | |
504: ('Gateway Timeout', | |
'The gateway server did not receive a timely response'), | |
505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), | |
} | |
def test(HandlerClass = BaseHTTPRequestHandler, | |
ServerClass = HTTPServer, protocol="HTTP/1.0"): | |
"""Test the HTTP request handler class. | |
This runs an HTTP server on port 8000 (or the first command line | |
argument). | |
""" | |
if sys.argv[1:]: | |
port = int(sys.argv[1]) | |
else: | |
port = 8000 | |
server_address = ('', port) | |
HandlerClass.protocol_version = protocol | |
httpd = ServerClass(server_address, HandlerClass) | |
sa = httpd.socket.getsockname() | |
print "Serving HTTP on", sa[0], "port", sa[1], "..." | |
httpd.serve_forever() | |
if __name__ == '__main__': | |
test() |