source file: /Library/Python/2.3/site-packages/CherryPy-3.0.1-py2.3.egg/cherrypy/lib/encoding.py
file stats: 145 lines, 11 executed: 7.6% covered
1. import struct 2. import time 3. 4. import cherrypy 5. 6. 7. def decode(encoding=None, default_encoding='utf-8'): 8. """Decode cherrypy.request.params.""" 9. if not encoding: 10. ct = cherrypy.request.headers.elements("Content-Type") 11. if ct: 12. ct = ct[0] 13. encoding = ct.params.get("charset", None) 14. if (not encoding) and ct.value.lower().startswith("text/"): 15. # http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 16. # When no explicit charset parameter is provided by the 17. # sender, media subtypes of the "text" type are defined 18. # to have a default charset value of "ISO-8859-1" when 19. # received via HTTP. 20. encoding = "ISO-8859-1" 21. 22. if not encoding: 23. encoding = default_encoding 24. 25. try: 26. decode_params(encoding) 27. except UnicodeDecodeError: 28. # IE and Firefox don't supply a charset when submitting form 29. # params with a CT of application/x-www-form-urlencoded. 30. # So after all our guessing, it could *still* be wrong. 31. # Start over with ISO-8859-1, since that seems to be preferred. 32. decode_params("ISO-8859-1") 33. 34. def decode_params(encoding): 35. decoded_params = {} 36. for key, value in cherrypy.request.params.items(): 37. if hasattr(value, 'file'): 38. # This is a file being uploaded: skip it 39. decoded_params[key] = value 40. elif isinstance(value, list): 41. # value is a list: decode each element 42. decoded_params[key] = [v.decode(encoding) for v in value] 43. elif isinstance(value, unicode): 44. pass 45. else: 46. # value is a regular string: decode it 47. decoded_params[key] = value.decode(encoding) 48. 49. # Decode all or nothing, so we can try again on error. 50. cherrypy.request.params = decoded_params 51. 52. 53. # Encoding 54. 55. def encode(encoding=None, errors='strict'): 56. # Guard against running twice 57. if getattr(cherrypy.request, "_encoding_attempted", False): 58. return 59. cherrypy.request._encoding_attempted = True 60. 61. ct = cherrypy.response.headers.elements("Content-Type") 62. if ct: 63. ct = ct[0] 64. if ct.value.lower().startswith("text/"): 65. # Set "charset=..." param on response Content-Type header 66. ct.params['charset'] = find_acceptable_charset(encoding, errors=errors) 67. cherrypy.response.headers["Content-Type"] = str(ct) 68. 69. def encode_stream(encoding, errors='strict'): 70. """Encode a streaming response body. 71. 72. Use a generator wrapper, and just pray it works as the stream is 73. being written out. 74. """ 75. def encoder(body): 76. for chunk in body: 77. if isinstance(chunk, unicode): 78. chunk = chunk.encode(encoding, errors) 79. yield chunk 80. cherrypy.response.body = encoder(cherrypy.response.body) 81. return True 82. 83. def encode_string(encoding, errors='strict'): 84. """Encode a buffered response body.""" 85. try: 86. body = [] 87. for chunk in cherrypy.response.body: 88. if isinstance(chunk, unicode): 89. chunk = chunk.encode(encoding, errors) 90. body.append(chunk) 91. cherrypy.response.body = body 92. except (LookupError, UnicodeError): 93. return False 94. else: 95. return True 96. 97. def find_acceptable_charset(encoding=None, default_encoding='utf-8', errors='strict'): 98. response = cherrypy.response 99. 100. if cherrypy.response.stream: 101. encoder = encode_stream 102. else: 103. response.collapse_body() 104. encoder = encode_string 105. if response.headers.has_key("Content-Length"): 106. # Delete Content-Length header so finalize() recalcs it. 107. # Encoded strings may be of different lengths from their 108. # unicode equivalents, and even from each other. For example: 109. # >>> t = u"\u7007\u3040" 110. # >>> len(t) 111. # 2 112. # >>> len(t.encode("UTF-8")) 113. # 6 114. # >>> len(t.encode("utf7")) 115. # 8 116. del response.headers["Content-Length"] 117. 118. # Parse the Accept-Charset request header, and try to provide one 119. # of the requested charsets (in order of user preference). 120. encs = cherrypy.request.headers.elements('Accept-Charset') 121. charsets = [enc.value.lower() for enc in encs] 122. attempted_charsets = [] 123. 124. if encoding is not None: 125. # If specified, force this encoding to be used, or fail. 126. encoding = encoding.lower() 127. if (not charsets) or "*" in charsets or encoding in charsets: 128. if encoder(encoding, errors): 129. return encoding 130. else: 131. if not encs: 132. # Any character-set is acceptable. 133. if encoder(default_encoding, errors): 134. return default_encoding 135. else: 136. raise cherrypy.HTTPError(500, failmsg % default_encoding) 137. else: 138. if "*" not in charsets: 139. # If no "*" is present in an Accept-Charset field, then all 140. # character sets not explicitly mentioned get a quality 141. # value of 0, except for ISO-8859-1, which gets a quality 142. # value of 1 if not explicitly mentioned. 143. iso = 'iso-8859-1' 144. if iso not in charsets: 145. attempted_charsets.append(iso) 146. if encoder(iso, errors): 147. return iso 148. 149. for element in encs: 150. if element.qvalue > 0: 151. if element.value == "*": 152. # Matches any charset. Try our default. 153. if default_encoding not in attempted_charsets: 154. attempted_charsets.append(default_encoding) 155. if encoder(default_encoding, errors): 156. return default_encoding 157. else: 158. encoding = element.value 159. if encoding not in attempted_charsets: 160. attempted_charsets.append(encoding) 161. if encoder(encoding, errors): 162. return encoding 163. 164. # No suitable encoding found. 165. ac = cherrypy.request.headers.get('Accept-Charset') 166. if ac is None: 167. msg = "Your client did not send an Accept-Charset header." 168. else: 169. msg = "Your client sent this Accept-Charset header: %s." % ac 170. msg += " We tried these charsets: %s." % ", ".join(attempted_charsets) 171. raise cherrypy.HTTPError(406, msg) 172. 173. 174. # GZIP 175. 176. def compress(body, compress_level): 177. """Compress 'body' at the given compress_level.""" 178. import zlib 179. 180. yield '\037\213' # magic header 181. yield '\010' # compression method 182. yield '\0' 183. yield struct.pack("<L", long(time.time())) 184. yield '\002' 185. yield '\377' 186. 187. crc = zlib.crc32("") 188. size = 0 189. zobj = zlib.compressobj(compress_level, 190. zlib.DEFLATED, -zlib.MAX_WBITS, 191. zlib.DEF_MEM_LEVEL, 0) 192. for line in body: 193. size += len(line) 194. crc = zlib.crc32(line, crc) 195. yield zobj.compress(line) 196. yield zobj.flush() 197. yield struct.pack("<l", crc) 198. yield struct.pack("<L", size & 0xFFFFFFFFL) 199. 200. def gzip(compress_level=9, mime_types=['text/html', 'text/plain']): 201. response = cherrypy.response 202. if not response.body: 203. # Response body is empty (might be a 304 for instance) 204. return 205. 206. acceptable = cherrypy.request.headers.elements('Accept-Encoding') 207. if not acceptable: 208. # If no Accept-Encoding field is present in a request, 209. # the server MAY assume that the client will accept any 210. # content coding. In this case, if "identity" is one of 211. # the available content-codings, then the server SHOULD use 212. # the "identity" content-coding, unless it has additional 213. # information that a different content-coding is meaningful 214. # to the client. 215. return 216. 217. ct = response.headers.get('Content-Type').split(';')[0] 218. for coding in acceptable: 219. if coding.value == 'identity' and coding.qvalue != 0: 220. return 221. if coding.value in ('gzip', 'x-gzip'): 222. if coding.qvalue == 0: 223. return 224. if ct in mime_types: 225. # Return a generator that compresses the page 226. varies = response.headers.get("Vary", "") 227. varies = [x.strip() for x in varies.split(",") if x.strip()] 228. if "Accept-Encoding" not in varies: 229. varies.append("Accept-Encoding") 230. response.headers['Vary'] = ", ".join(varies) 231. 232. response.headers['Content-Encoding'] = 'gzip' 233. response.body = compress(response.body, compress_level) 234. if response.headers.has_key("Content-Length"): 235. # Delete Content-Length header so finalize() recalcs it. 236. del response.headers["Content-Length"] 237. return 238. cherrypy.HTTPError(406, "identity, gzip").set_response()