source file: /Library/Python/2.3/site-packages/CherryPy-3.0.1-py2.3.egg/cherrypy/lib/http.py
file stats: 270 lines, 126 executed: 46.7% covered
   1. """HTTP library functions."""
   2. 
   3. # This module contains functions for building an HTTP application
   4. # framework: any one, not just one whose name starts with "Ch". ;) If you
   5. # reference any modules from some popular framework inside *this* module,
   6. # FuManChu will personally hang you up by your thumbs and submit you
   7. # to a public caning.
   8. 
   9. from BaseHTTPServer import BaseHTTPRequestHandler
  10. response_codes = BaseHTTPRequestHandler.responses.copy()
  11. 
  12. # From http://www.cherrypy.org/ticket/361
  13. response_codes[500] = ('Internal Server Error',
  14.                       'The server encountered an unexpected condition '
  15.                       'which prevented it from fulfilling the request.')
  16. response_codes[503] = ('Service Unavailable',
  17.                       'The server is currently unable to handle the '
  18.                       'request due to a temporary overloading or '
  19.                       'maintenance of the server.')
  20. 
  21. 
  22. import cgi
  23. from email.Header import Header, decode_header
  24. import re
  25. import rfc822
  26. HTTPDate = rfc822.formatdate
  27. import time
  28. 
  29. 
  30. def urljoin(*atoms):
  31.     url = "/".join(atoms)
  32.     while "//" in url:
  33.         url = url.replace("//", "/")
  34.     return url
  35. 
  36. def protocol_from_http(protocol_str):
  37.     """Return a protocol tuple from the given 'HTTP/x.y' string."""
  38.     return int(protocol_str[5]), int(protocol_str[7])
  39. 
  40. def get_ranges(headervalue, content_length):
  41.     """Return a list of (start, stop) indices from a Range header, or None.
  42. 
  43.     Each (start, stop) tuple will be composed of two ints, which are suitable
  44.     for use in a slicing operation. That is, the header "Range: bytes=3-6",
  45.     if applied against a Python string, is requesting resource[3:7]. This
  46.     function will return the list [(3, 7)].
  47. 
  48.     If this function return an empty list, you should return HTTP 416.
  49.     """
  50. 
  51.     if not headervalue:
  52.         return None
  53. 
  54.     result = []
  55.     bytesunit, byteranges = headervalue.split("=", 1)
  56.     for brange in byteranges.split(","):
  57.         start, stop = [x.strip() for x in brange.split("-", 1)]
  58.         if start:
  59.             if not stop:
  60.                 stop = content_length - 1
  61.             start, stop = map(int, (start, stop))
  62.             if start >= content_length:
  63.                 # From rfc 2616 sec 14.16:
  64.                 # "If the server receives a request (other than one
  65.                 # including an If-Range request-header field) with an
  66.                 # unsatisfiable Range request-header field (that is,
  67.                 # all of whose byte-range-spec values have a first-byte-pos
  68.                 # value greater than the current length of the selected
  69.                 # resource), it SHOULD return a response code of 416
  70.                 # (Requested range not satisfiable)."
  71.                 continue
  72.             if stop < start:
  73.                 # From rfc 2616 sec 14.16:
  74.                 # "If the server ignores a byte-range-spec because it
  75.                 # is syntactically invalid, the server SHOULD treat
  76.                 # the request as if the invalid Range header field
  77.                 # did not exist. (Normally, this means return a 200
  78.                 # response containing the full entity)."
  79.                 return None
  80.             result.append((start, stop + 1))
  81.         else:
  82.             if not stop:
  83.                 # See rfc quote above.
  84.                 return None
  85.             # Negative subscript (last N bytes)
  86.             result.append((content_length - int(stop), content_length))
  87. 
  88.     return result
  89. 
  90. 
  91. class HeaderElement(object):
  92.     """An element (with parameters) from an HTTP header's element list."""
  93. 
  94.     def __init__(self, value, params=None):
  95.         self.value = value
  96.         if params is None:
  97.             params = {}
  98.         self.params = params
  99. 
 100.     def __unicode__(self):
 101.         p = [";%s=%s" % (k, v) for k, v in self.params.iteritems()]
 102.         return u"%s%s" % (self.value, "".join(p))
 103. 
 104.     def __str__(self):
 105.         return str(self.__unicode__())
 106. 
 107.     def parse(elementstr):
 108.         """Transform 'token;key=val' to ('token', {'key': 'val'})."""
 109.         # Split the element into a value and parameters. The 'value' may
 110.         # be of the form, "token=token", but we don't split that here.
 111.         atoms = [x.strip() for x in elementstr.split(";")]
 112.         initial_value = atoms.pop(0).strip()
 113.         params = {}
 114.         for atom in atoms:
 115.             atom = [x.strip() for x in atom.split("=", 1) if x.strip()]
 116.             key = atom.pop(0)
 117.             if atom:
 118.                 val = atom[0]
 119.             else:
 120.                 val = ""
 121.             params[key] = val
 122.         return initial_value, params
 123.     parse = staticmethod(parse)
 124. 
 125.     def from_str(cls, elementstr):
 126.         """Construct an instance from a string of the form 'token;key=val'."""
 127.         ival, params = cls.parse(elementstr)
 128.         return cls(ival, params)
 129.     from_str = classmethod(from_str)
 130. 
 131. 
 132. q_separator = re.compile(r'; *q *=')
 133. 
 134. class AcceptElement(HeaderElement):
 135.     """An element (with parameters) from an Accept-* header's element list."""
 136. 
 137.     def from_str(cls, elementstr):
 138.         qvalue = None
 139.         # The first "q" parameter (if any) separates the initial
 140.         # parameter(s) (if any) from the accept-params.
 141.         atoms = q_separator.split(elementstr, 1)
 142.         initial_value = atoms.pop(0).strip()
 143.         if atoms:
 144.             # The qvalue for an Accept header can have extensions. The other
 145.             # headers cannot, but it's easier to parse them as if they did.
 146.             qvalue = HeaderElement.from_str(atoms[0].strip())
 147. 
 148.         ival, params = cls.parse(initial_value)
 149.         if qvalue is not None:
 150.             params["q"] = qvalue
 151.         return cls(ival, params)
 152.     from_str = classmethod(from_str)
 153. 
 154.     def qvalue(self):
 155.         val = self.params.get("q", "1")
 156.         if isinstance(val, HeaderElement):
 157.             val = val.value
 158.         return float(val)
 159.     qvalue = property(qvalue, doc="The qvalue, or priority, of this value.")
 160. 
 161.     def __cmp__(self, other):
 162.         # If you sort a list of AcceptElement objects, they will be listed
 163.         # in priority order; the most preferred value will be first.
 164.         diff = cmp(other.qvalue, self.qvalue)
 165.         if diff == 0:
 166.             diff = cmp(str(other), str(self))
 167.         return diff
 168. 
 169. 
 170. def header_elements(fieldname, fieldvalue):
 171.     """Return a HeaderElement list from a comma-separated header str."""
 172. 
 173.     if not fieldvalue:
 174.         return None
 175.     headername = fieldname.lower()
 176. 
 177.     result = []
 178.     for element in fieldvalue.split(","):
 179.         if headername.startswith("accept") or headername == 'te':
 180.             hv = AcceptElement.from_str(element)
 181.         else:
 182.             hv = HeaderElement.from_str(element)
 183.         result.append(hv)
 184. 
 185.     result.sort()
 186.     return result
 187. 
 188. def decode_TEXT(value):
 189.     """Decode RFC-2047 TEXT (e.g. "=?utf-8?q?f=C3=BCr?=" -> u"f\xfcr")."""
 190.     atoms = decode_header(value)
 191.     decodedvalue = ""
 192.     for atom, charset in atoms:
 193.         if charset is not None:
 194.             atom = atom.decode(charset)
 195.         decodedvalue += atom
 196.     return decodedvalue
 197. 
 198. def valid_status(status):
 199.     """Return legal HTTP status Code, Reason-phrase and Message.
 200. 
 201.     The status arg must be an int, or a str that begins with an int.
 202. 
 203.     If status is an int, or a str and  no reason-phrase is supplied,
 204.     a default reason-phrase will be provided.
 205.     """
 206. 
 207.     if not status:
 208.         status = 200
 209. 
 210.     status = str(status)
 211.     parts = status.split(" ", 1)
 212.     if len(parts) == 1:
 213.         # No reason supplied.
 214.         code, = parts
 215.         reason = None
 216.     else:
 217.         code, reason = parts
 218.         reason = reason.strip()
 219. 
 220.     try:
 221.         code = int(code)
 222.     except ValueError:
 223.         raise ValueError("Illegal response status from server "
 224.                          "(%s is non-numeric)." % repr(code))
 225. 
 226.     if code < 100 or code > 599:
 227.         raise ValueError("Illegal response status from server "
 228.                          "(%s is out of range)." % repr(code))
 229. 
 230.     if code not in response_codes:
 231.         # code is unknown but not illegal
 232.         default_reason, message = "", ""
 233.     else:
 234.         default_reason, message = response_codes[code]
 235. 
 236.     if reason is None:
 237.         reason = default_reason
 238. 
 239.     return code, reason, message
 240. 
 241. 
 242. image_map_pattern = re.compile(r"[0-9]+,[0-9]+")
 243. 
 244. def parse_query_string(query_string, keep_blank_values=True):
 245.     """Build a params dictionary from a query_string."""
 246.     if image_map_pattern.match(query_string):
 247.         # Server-side image map. Map the coords to 'x' and 'y'
 248.         # (like CGI::Request does).
 249.         pm = query_string.split(",")
 250.         pm = {'x': int(pm[0]), 'y': int(pm[1])}
 251.     else:
 252.         pm = cgi.parse_qs(query_string, keep_blank_values)
 253.         for key, val in pm.items():
 254.             if len(val) == 1:
 255.                 pm[key] = val[0]
 256.     return pm
 257. 
 258. def params_from_CGI_form(form):
 259.     params = {}
 260.     for key in form.keys():
 261.         value_list = form[key]
 262.         if isinstance(value_list, list):
 263.             params[key] = []
 264.             for item in value_list:
 265.                 if item.filename is not None:
 266.                     value = item # It's a file upload
 267.                 else:
 268.                     value = item.value # It's a regular field
 269.                 params[key].append(value)
 270.         else:
 271.             if value_list.filename is not None:
 272.                 value = value_list # It's a file upload
 273.             else:
 274.                 value = value_list.value # It's a regular field
 275.             params[key] = value
 276.     return params
 277. 
 278. 
 279. class CaseInsensitiveDict(dict):
 280.     """A case-insensitive dict subclass.
 281. 
 282.     Each key is changed on entry to str(key).title().
 283.     """
 284. 
 285.     def __getitem__(self, key):
 286.         return dict.__getitem__(self, str(key).title())
 287. 
 288.     def __setitem__(self, key, value):
 289.         dict.__setitem__(self, str(key).title(), value)
 290. 
 291.     def __delitem__(self, key):
 292.         dict.__delitem__(self, str(key).title())
 293. 
 294.     def __contains__(self, key):
 295.         return dict.__contains__(self, str(key).title())
 296. 
 297.     def get(self, key, default=None):
 298.         return dict.get(self, str(key).title(), default)
 299. 
 300.     def has_key(self, key):
 301.         return dict.has_key(self, str(key).title())
 302. 
 303.     def update(self, E):
 304.         for k in E.keys():
 305.             self[str(k).title()] = E[k]
 306. 
 307.     def fromkeys(cls, seq, value=None):
 308.         newdict = cls()
 309.         for k in seq:
 310.             newdict[str(k).title()] = value
 311.         return newdict
 312.     fromkeys = classmethod(fromkeys)
 313. 
 314.     def setdefault(self, key, x=None):
 315.         key = str(key).title()
 316.         try:
 317.             return self[key]
 318.         except KeyError:
 319.             self[key] = x
 320.             return x
 321. 
 322.     def pop(self, key, default):
 323.         return dict.pop(self, str(key).title(), default)
 324. 
 325. 
 326. class HeaderMap(CaseInsensitiveDict):
 327.     """A dict subclass for HTTP request and response headers.
 328. 
 329.     Each key is changed on entry to str(key).title(). This allows headers
 330.     to be case-insensitive and avoid duplicates.
 331. 
 332.     Values are header values (decoded according to RFC 2047 if necessary).
 333.     """
 334. 
 335.     def elements(self, key):
 336.         """Return a list of HeaderElements for the given header (or None)."""
 337.         key = str(key).title()
 338.         h = self.get(key)
 339.         if h is None:
 340.             return []
 341.         return header_elements(key, h)
 342. 
 343.     def output(self, protocol=(1, 1)):
 344.         """Transform self into a list of (name, value) tuples."""
 345.         header_list = []
 346.         for key, v in self.iteritems():
 347.             if isinstance(v, unicode):
 348.                 # HTTP/1.0 says, "Words of *TEXT may contain octets
 349.                 # from character sets other than US-ASCII." and
 350.                 # "Recipients of header field TEXT containing octets
 351.                 # outside the US-ASCII character set may assume that
 352.                 # they represent ISO-8859-1 characters."
 353.                 try:
 354.                     v = v.encode("iso-8859-1")
 355.                 except UnicodeEncodeError:
 356.                     if protocol >= (1, 1):
 357.                         # Encode RFC-2047 TEXT
 358.                         # (e.g. u"\u8200" -> "=?utf-8?b?6IiA?=").
 359.                         v = Header(v, 'utf-8').encode()
 360.                     else:
 361.                         raise
 362.             else:
 363.                 # This coercion should not take any time at all
 364.                 # if value is already of type "str".
 365.                 v = str(v)
 366.             header_list.append((key, v))
 367.         return header_list
 368. 
 369. 
 370. class MaxSizeExceeded(Exception):
 371.     pass
 372. 
 373. class SizeCheckWrapper(object):
 374.     """Wraps a file-like object, raising MaxSizeExceeded if too large."""
 375. 
 376.     def __init__(self, rfile, maxlen):
 377.         self.rfile = rfile
 378.         self.maxlen = maxlen
 379.         self.bytes_read = 0
 380. 
 381.     def _check_length(self):
 382.         if self.maxlen and self.bytes_read > self.maxlen:
 383.             raise MaxSizeExceeded()
 384. 
 385.     def read(self, size = None):
 386.         data = self.rfile.read(size)
 387.         self.bytes_read += len(data)
 388.         self._check_length()
 389.         return data
 390. 
 391.     def readline(self, size = None):
 392.         if size is not None:
 393.             data = self.rfile.readline(size)
 394.             self.bytes_read += len(data)
 395.             self._check_length()
 396.             return data
 397. 
 398.         # User didn't specify a size ...
 399.         # We read the line in chunks to make sure it's not a 100MB line !
 400.         res = []
 401.         while True:
 402.             data = self.rfile.readline(256)
 403.             self.bytes_read += len(data)
 404.             self._check_length()
 405.             res.append(data)
 406.             # See http://www.cherrypy.org/ticket/421
 407.             if len(data) < 256 or data[-1:] == "\n":
 408.                 return ''.join(res)
 409. 
 410.     def readlines(self, sizehint = 0):
 411.         # Shamelessly stolen from StringIO
 412.         total = 0
 413.         lines = []
 414.         line = self.readline()
 415.         while line:
 416.             lines.append(line)
 417.             total += len(line)
 418.             if 0 < sizehint <= total:
 419.                 break
 420.             line = self.readline()
 421.         return lines
 422. 
 423.     def close(self):
 424.         self.rfile.close()
 425. 
 426.     def __iter__(self):
 427.         return self
 428. 
 429.     def next(self):
 430.         data = self.rfile.next()
 431.         self.bytes_read += len(data)
 432.         self._check_length()
 433.         return data
 434. 
 435. 
 436. class Host(object):
 437.     """An internet address.
 438. 
 439.     name should be the client's host name. If not available (because no DNS
 440.         lookup is performed), the IP address should be used instead.
 441.     """
 442. 
 443.     ip = "0.0.0.0"
 444.     port = 80
 445.     name = "unknown.tld"
 446. 
 447.     def __init__(self, ip, port, name=None):
 448.         self.ip = ip
 449.         self.port = port
 450.         if name is None:
 451.             name = ip
 452.         self.name = name
 453. 
 454.     def __repr__(self):
 455.         return "http.Host(%r, %r, %r)" % (self.ip, self.port, self.name)