source file: /Library/Python/2.3/site-packages/CherryPy-3.0.1-py2.3.egg/cherrypy/lib/encoding.py
file stats: 145 lines, 11 executed: 7.6% covered
   1. import struct
   2. import time
   3. 
   4. import cherrypy
   5. 
   6. 
   7. def decode(encoding=None, default_encoding='utf-8'):
   8.     """Decode cherrypy.request.params."""
   9.     if not encoding:
  10.         ct = cherrypy.request.headers.elements("Content-Type")
  11.         if ct:
  12.             ct = ct[0]
  13.             encoding = ct.params.get("charset", None)
  14.             if (not encoding) and ct.value.lower().startswith("text/"):
  15.                 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
  16.                 # When no explicit charset parameter is provided by the
  17.                 # sender, media subtypes of the "text" type are defined
  18.                 # to have a default charset value of "ISO-8859-1" when
  19.                 # received via HTTP.
  20.                 encoding = "ISO-8859-1"
  21. 
  22.         if not encoding:
  23.             encoding = default_encoding
  24. 
  25.     try:
  26.         decode_params(encoding)
  27.     except UnicodeDecodeError:
  28.         # IE and Firefox don't supply a charset when submitting form
  29.         # params with a CT of application/x-www-form-urlencoded.
  30.         # So after all our guessing, it could *still* be wrong.
  31.         # Start over with ISO-8859-1, since that seems to be preferred.
  32.         decode_params("ISO-8859-1")
  33. 
  34. def decode_params(encoding):
  35.     decoded_params = {}
  36.     for key, value in cherrypy.request.params.items():
  37.         if hasattr(value, 'file'):
  38.             # This is a file being uploaded: skip it
  39.             decoded_params[key] = value
  40.         elif isinstance(value, list):
  41.             # value is a list: decode each element
  42.             decoded_params[key] = [v.decode(encoding) for v in value]
  43.         elif isinstance(value, unicode):
  44.             pass
  45.         else:
  46.             # value is a regular string: decode it
  47.             decoded_params[key] = value.decode(encoding)
  48. 
  49.     # Decode all or nothing, so we can try again on error.
  50.     cherrypy.request.params = decoded_params
  51. 
  52. 
  53. # Encoding
  54. 
  55. def encode(encoding=None, errors='strict'):
  56.     # Guard against running twice
  57.     if getattr(cherrypy.request, "_encoding_attempted", False):
  58.         return
  59.     cherrypy.request._encoding_attempted = True
  60. 
  61.     ct = cherrypy.response.headers.elements("Content-Type")
  62.     if ct:
  63.         ct = ct[0]
  64.         if ct.value.lower().startswith("text/"):
  65.             # Set "charset=..." param on response Content-Type header
  66.             ct.params['charset'] = find_acceptable_charset(encoding, errors=errors)
  67.             cherrypy.response.headers["Content-Type"] = str(ct)
  68. 
  69. def encode_stream(encoding, errors='strict'):
  70.     """Encode a streaming response body.
  71. 
  72.     Use a generator wrapper, and just pray it works as the stream is
  73.     being written out.
  74.     """
  75.     def encoder(body):
  76.         for chunk in body:
  77.             if isinstance(chunk, unicode):
  78.                 chunk = chunk.encode(encoding, errors)
  79.             yield chunk
  80.     cherrypy.response.body = encoder(cherrypy.response.body)
  81.     return True
  82. 
  83. def encode_string(encoding, errors='strict'):
  84.     """Encode a buffered response body."""
  85.     try:
  86.         body = []
  87.         for chunk in cherrypy.response.body:
  88.             if isinstance(chunk, unicode):
  89.                 chunk = chunk.encode(encoding, errors)
  90.             body.append(chunk)
  91.         cherrypy.response.body = body
  92.     except (LookupError, UnicodeError):
  93.         return False
  94.     else:
  95.         return True
  96. 
  97. def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'):
  98.     response = cherrypy.response
  99. 
 100.     if cherrypy.response.stream:
 101.         encoder = encode_stream
 102.     else:
 103.         response.collapse_body()
 104.         encoder = encode_string
 105.         if response.headers.has_key("Content-Length"):
 106.             # Delete Content-Length header so finalize() recalcs it.
 107.             # Encoded strings may be of different lengths from their
 108.             # unicode equivalents, and even from each other. For example:
 109.             # >>> t = u"\u7007\u3040"
 110.             # >>> len(t)
 111.             # 2
 112.             # >>> len(t.encode("UTF-8"))
 113.             # 6
 114.             # >>> len(t.encode("utf7"))
 115.             # 8
 116.             del response.headers["Content-Length"]
 117. 
 118.     # Parse the Accept-Charset request header, and try to provide one
 119.     # of the requested charsets (in order of user preference).
 120.     encs = cherrypy.request.headers.elements('Accept-Charset')
 121.     charsets = [enc.value.lower() for enc in encs]
 122.     attempted_charsets = []
 123. 
 124.     if encoding is not None:
 125.         # If specified, force this encoding to be used, or fail.
 126.         encoding = encoding.lower()
 127.         if (not charsets) or "*" in charsets or encoding in charsets:
 128.             if encoder(encoding, errors):
 129.                 return encoding
 130.     else:
 131.         if not encs:
 132.             # Any character-set is acceptable.
 133.             if encoder(default_encoding, errors):
 134.                 return default_encoding
 135.             else:
 136.                 raise cherrypy.HTTPError(500, failmsg % default_encoding)
 137.         else:
 138.             if "*" not in charsets:
 139.                 # If no "*" is present in an Accept-Charset field, then all
 140.                 # character sets not explicitly mentioned get a quality
 141.                 # value of 0, except for ISO-8859-1, which gets a quality
 142.                 # value of 1 if not explicitly mentioned.
 143.                 iso = 'iso-8859-1'
 144.                 if iso not in charsets:
 145.                     attempted_charsets.append(iso)
 146.                     if encoder(iso, errors):
 147.                         return iso
 148. 
 149.             for element in encs:
 150.                 if element.qvalue > 0:
 151.                     if element.value == "*":
 152.                         # Matches any charset. Try our default.
 153.                         if default_encoding not in attempted_charsets:
 154.                             attempted_charsets.append(default_encoding)
 155.                             if encoder(default_encoding, errors):
 156.                                 return default_encoding
 157.                     else:
 158.                         encoding = element.value
 159.                         if encoding not in attempted_charsets:
 160.                             attempted_charsets.append(encoding)
 161.                             if encoder(encoding, errors):
 162.                                 return encoding
 163. 
 164.     # No suitable encoding found.
 165.     ac = cherrypy.request.headers.get('Accept-Charset')
 166.     if ac is None:
 167.         msg = "Your client did not send an Accept-Charset header."
 168.     else:
 169.         msg = "Your client sent this Accept-Charset header: %s." % ac
 170.     msg += " We tried these charsets: %s." % ", ".join(attempted_charsets)
 171.     raise cherrypy.HTTPError(406, msg)
 172. 
 173. 
 174. # GZIP
 175. 
 176. def compress(body, compress_level):
 177.     """Compress 'body' at the given compress_level."""
 178.     import zlib
 179. 
 180.     yield '\037\213'      # magic header
 181.     yield '\010'         # compression method
 182.     yield '\0'
 183.     yield struct.pack("<L", long(time.time()))
 184.     yield '\002'
 185.     yield '\377'
 186. 
 187.     crc = zlib.crc32("")
 188.     size = 0
 189.     zobj = zlib.compressobj(compress_level,
 190.                             zlib.DEFLATED, -zlib.MAX_WBITS,
 191.                             zlib.DEF_MEM_LEVEL, 0)
 192.     for line in body:
 193.         size += len(line)
 194.         crc = zlib.crc32(line, crc)
 195.         yield zobj.compress(line)
 196.     yield zobj.flush()
 197.     yield struct.pack("<l", crc)
 198.     yield struct.pack("<L", size & 0xFFFFFFFFL)
 199. 
 200. def gzip(compress_level=9, mime_types=['text/html', 'text/plain']):
 201.     response = cherrypy.response
 202.     if not response.body:
 203.         # Response body is empty (might be a 304 for instance)
 204.         return
 205. 
 206.     acceptable = cherrypy.request.headers.elements('Accept-Encoding')
 207.     if not acceptable:
 208.         # If no Accept-Encoding field is present in a request,
 209.         # the server MAY assume that the client will accept any
 210.         # content coding. In this case, if "identity" is one of
 211.         # the available content-codings, then the server SHOULD use
 212.         # the "identity" content-coding, unless it has additional
 213.         # information that a different content-coding is meaningful
 214.         # to the client.
 215.         return
 216. 
 217.     ct = response.headers.get('Content-Type').split(';')[0]
 218.     for coding in acceptable:
 219.         if coding.value == 'identity' and coding.qvalue != 0:
 220.             return
 221.         if coding.value in ('gzip', 'x-gzip'):
 222.             if coding.qvalue == 0:
 223.                 return
 224.             if ct in mime_types:
 225.                 # Return a generator that compresses the page
 226.                 varies = response.headers.get("Vary", "")
 227.                 varies = [x.strip() for x in varies.split(",") if x.strip()]
 228.                 if "Accept-Encoding" not in varies:
 229.                     varies.append("Accept-Encoding")
 230.                 response.headers['Vary'] = ", ".join(varies)
 231. 
 232.                 response.headers['Content-Encoding'] = 'gzip'
 233.                 response.body = compress(response.body, compress_level)
 234.                 if response.headers.has_key("Content-Length"):
 235.                     # Delete Content-Length header so finalize() recalcs it.
 236.                     del response.headers["Content-Length"]
 237.             return
 238.     cherrypy.HTTPError(406, "identity, gzip").set_response()