source file: /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/httplib.py
file stats: 702 lines, 133 executed: 18.9% covered
   1. """HTTP/1.1 client library
   2. 
   3. <intro stuff goes here>
   4. <other stuff, too>
   5. 
   6. HTTPConnection go through a number of "states", which defines when a client
   7. may legally make another request or fetch the response for a particular
   8. request. This diagram details these state transitions:
   9. 
  10.     (null)
  11.       |
  12.       | HTTPConnection()
  13.       v
  14.     Idle
  15.       |
  16.       | putrequest()
  17.       v
  18.     Request-started
  19.       |
  20.       | ( putheader() )*  endheaders()
  21.       v
  22.     Request-sent
  23.       |
  24.       | response = getresponse()
  25.       v
  26.     Unread-response   [Response-headers-read]
  27.       |\____________________
  28.       |                     |
  29.       | response.read()     | putrequest()
  30.       v                     v
  31.     Idle                  Req-started-unread-response
  32.                      ______/|
  33.                    /        |
  34.    response.read() |        | ( putheader() )*  endheaders()
  35.                    v        v
  36.        Request-started    Req-sent-unread-response
  37.                             |
  38.                             | response.read()
  39.                             v
  40.                           Request-sent
  41. 
  42. This diagram presents the following rules:
  43.   -- a second request may not be started until {response-headers-read}
  44.   -- a response [object] cannot be retrieved until {request-sent}
  45.   -- there is no differentiation between an unread response body and a
  46.      partially read response body
  47. 
  48. Note: this enforcement is applied by the HTTPConnection class. The
  49.       HTTPResponse class does not enforce this state machine, which
  50.       implies sophisticated clients may accelerate the request/response
  51.       pipeline. Caution should be taken, though: accelerating the states
  52.       beyond the above pattern may imply knowledge of the server's
  53.       connection-close behavior for certain requests. For example, it
  54.       is impossible to tell whether the server will close the connection
  55.       UNTIL the response headers have been read; this means that further
  56.       requests cannot be placed into the pipeline until it is known that
  57.       the server will NOT be closing the connection.
  58. 
  59. Logical State                  __state            __response
  60. -------------                  -------            ----------
  61. Idle                           _CS_IDLE           None
  62. Request-started                _CS_REQ_STARTED    None
  63. Request-sent                   _CS_REQ_SENT       None
  64. Unread-response                _CS_IDLE           <response_class>
  65. Req-started-unread-response    _CS_REQ_STARTED    <response_class>
  66. Req-sent-unread-response       _CS_REQ_SENT       <response_class>
  67. """
  68. 
  69. import errno
  70. import mimetools
  71. import socket
  72. from urlparse import urlsplit
  73. 
  74. try:
  75.     from cStringIO import StringIO
  76. except ImportError:
  77.     from StringIO import StringIO
  78. 
  79. __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection",
  80.            "HTTPException", "NotConnected", "UnknownProtocol",
  81.            "UnknownTransferEncoding", "UnimplementedFileMode",
  82.            "IncompleteRead", "InvalidURL", "ImproperConnectionState",
  83.            "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
  84.            "BadStatusLine", "error"]
  85. 
  86. HTTP_PORT = 80
  87. HTTPS_PORT = 443
  88. 
  89. _UNKNOWN = 'UNKNOWN'
  90. 
  91. # connection states
  92. _CS_IDLE = 'Idle'
  93. _CS_REQ_STARTED = 'Request-started'
  94. _CS_REQ_SENT = 'Request-sent'
  95. 
  96. class HTTPMessage(mimetools.Message):
  97. 
  98.     def addheader(self, key, value):
  99.         """Add header for field key handling repeats."""
 100.         prev = self.dict.get(key)
 101.         if prev is None:
 102.             self.dict[key] = value
 103.         else:
 104.             combined = ", ".join((prev, value))
 105.             self.dict[key] = combined
 106. 
 107.     def addcontinue(self, key, more):
 108.         """Add more field data from a continuation line."""
 109.         prev = self.dict[key]
 110.         self.dict[key] = prev + "\n " + more
 111. 
 112.     def readheaders(self):
 113.         """Read header lines.
 114. 
 115.         Read header lines up to the entirely blank line that terminates them.
 116.         The (normally blank) line that ends the headers is skipped, but not
 117.         included in the returned list.  If a non-header line ends the headers,
 118.         (which is an error), an attempt is made to backspace over it; it is
 119.         never included in the returned list.
 120. 
 121.         The variable self.status is set to the empty string if all went well,
 122.         otherwise it is an error message.  The variable self.headers is a
 123.         completely uninterpreted list of lines contained in the header (so
 124.         printing them will reproduce the header exactly as it appears in the
 125.         file).
 126. 
 127.         If multiple header fields with the same name occur, they are combined
 128.         according to the rules in RFC 2616 sec 4.2:
 129. 
 130.         Appending each subsequent field-value to the first, each separated
 131.         by a comma. The order in which header fields with the same field-name
 132.         are received is significant to the interpretation of the combined
 133.         field value.
 134.         """
 135.         # XXX The implementation overrides the readheaders() method of
 136.         # rfc822.Message.  The base class design isn't amenable to
 137.         # customized behavior here so the method here is a copy of the
 138.         # base class code with a few small changes.
 139. 
 140.         self.dict = {}
 141.         self.unixfrom = ''
 142.         self.headers = hlist = []
 143.         self.status = ''
 144.         headerseen = ""
 145.         firstline = 1
 146.         startofline = unread = tell = None
 147.         if hasattr(self.fp, 'unread'):
 148.             unread = self.fp.unread
 149.         elif self.seekable:
 150.             tell = self.fp.tell
 151.         while True:
 152.             if tell:
 153.                 try:
 154.                     startofline = tell()
 155.                 except IOError:
 156.                     startofline = tell = None
 157.                     self.seekable = 0
 158.             line = self.fp.readline()
 159.             if not line:
 160.                 self.status = 'EOF in headers'
 161.                 break
 162.             # Skip unix From name time lines
 163.             if firstline and line.startswith('From '):
 164.                 self.unixfrom = self.unixfrom + line
 165.                 continue
 166.             firstline = 0
 167.             if headerseen and line[0] in ' \t':
 168.                 # XXX Not sure if continuation lines are handled properly
 169.                 # for http and/or for repeating headers
 170.                 # It's a continuation line.
 171.                 hlist.append(line)
 172.                 self.addcontinue(headerseen, line.strip())
 173.                 continue
 174.             elif self.iscomment(line):
 175.                 # It's a comment.  Ignore it.
 176.                 continue
 177.             elif self.islast(line):
 178.                 # Note! No pushback here!  The delimiter line gets eaten.
 179.                 break
 180.             headerseen = self.isheader(line)
 181.             if headerseen:
 182.                 # It's a legal header line, save it.
 183.                 hlist.append(line)
 184.                 self.addheader(headerseen, line[len(headerseen)+1:].strip())
 185.                 continue
 186.             else:
 187.                 # It's not a header line; throw it back and stop here.
 188.                 if not self.dict:
 189.                     self.status = 'No headers'
 190.                 else:
 191.                     self.status = 'Non-header line where header expected'
 192.                 # Try to undo the read.
 193.                 if unread:
 194.                     unread(line)
 195.                 elif tell:
 196.                     self.fp.seek(startofline)
 197.                 else:
 198.                     self.status = self.status + '; bad seek'
 199.                 break
 200. 
 201. class HTTPResponse:
 202. 
 203.     # strict: If true, raise BadStatusLine if the status line can't be
 204.     # parsed as a valid HTTP/1.0 or 1.1 status line.  By default it is
 205.     # false because it prevents clients from talking to HTTP/0.9
 206.     # servers.  Note that a response with a sufficiently corrupted
 207.     # status line will look like an HTTP/0.9 response.
 208. 
 209.     # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
 210. 
 211.     def __init__(self, sock, debuglevel=0, strict=0, method=None):
 212.         self.fp = sock.makefile('rb', 0)
 213.         self.debuglevel = debuglevel
 214.         self.strict = strict
 215.         self._method = method
 216. 
 217.         self.msg = None
 218. 
 219.         # from the Status-Line of the response
 220.         self.version = _UNKNOWN # HTTP-Version
 221.         self.status = _UNKNOWN  # Status-Code
 222.         self.reason = _UNKNOWN  # Reason-Phrase
 223. 
 224.         self.chunked = _UNKNOWN         # is "chunked" being used?
 225.         self.chunk_left = _UNKNOWN      # bytes left to read in current chunk
 226.         self.length = _UNKNOWN          # number of bytes left in response
 227.         self.will_close = _UNKNOWN      # conn will close at end of response
 228. 
 229.     def _read_status(self):
 230.         # Initialize with Simple-Response defaults
 231.         line = self.fp.readline()
 232.         if self.debuglevel > 0:
 233.             print "reply:", repr(line)
 234.         if not line:
 235.             # Presumably, the server closed the connection before
 236.             # sending a valid response.
 237.             raise BadStatusLine(line)
 238.         try:
 239.             [version, status, reason] = line.split(None, 2)
 240.         except ValueError:
 241.             try:
 242.                 [version, status] = line.split(None, 1)
 243.                 reason = ""
 244.             except ValueError:
 245.                 # empty version will cause next test to fail and status
 246.                 # will be treated as 0.9 response.
 247.                 version = ""
 248.         if not version.startswith('HTTP/'):
 249.             if self.strict:
 250.                 self.close()
 251.                 raise BadStatusLine(line)
 252.             else:
 253.                 # assume it's a Simple-Response from an 0.9 server
 254.                 self.fp = LineAndFileWrapper(line, self.fp)
 255.                 return "HTTP/0.9", 200, ""
 256. 
 257.         # The status code is a three-digit number
 258.         try:
 259.             status = int(status)
 260.             if status < 100 or status > 999:
 261.                 raise BadStatusLine(line)
 262.         except ValueError:
 263.             raise BadStatusLine(line)
 264.         return version, status, reason
 265. 
 266.     def begin(self):
 267.         if self.msg is not None:
 268.             # we've already started reading the response
 269.             return
 270. 
 271.         # read until we get a non-100 response
 272.         while True:
 273.             version, status, reason = self._read_status()
 274.             if status != 100:
 275.                 break
 276.             # skip the header from the 100 response
 277.             while True:
 278.                 skip = self.fp.readline().strip()
 279.                 if not skip:
 280.                     break
 281.                 if self.debuglevel > 0:
 282.                     print "header:", skip
 283. 
 284.         self.status = status
 285.         self.reason = reason.strip()
 286.         if version == 'HTTP/1.0':
 287.             self.version = 10
 288.         elif version.startswith('HTTP/1.'):
 289.             self.version = 11   # use HTTP/1.1 code for HTTP/1.x where x>=1
 290.         elif version == 'HTTP/0.9':
 291.             self.version = 9
 292.         else:
 293.             raise UnknownProtocol(version)
 294. 
 295.         if self.version == 9:
 296.             self.chunked = 0
 297.             self.will_close = 1
 298.             self.msg = HTTPMessage(StringIO())
 299.             return
 300. 
 301.         self.msg = HTTPMessage(self.fp, 0)
 302.         if self.debuglevel > 0:
 303.             for hdr in self.msg.headers:
 304.                 print "header:", hdr,
 305. 
 306.         # don't let the msg keep an fp
 307.         self.msg.fp = None
 308. 
 309.         # are we using the chunked-style of transfer encoding?
 310.         tr_enc = self.msg.getheader('transfer-encoding')
 311.         if tr_enc and tr_enc.lower() == "chunked":
 312.             self.chunked = 1
 313.             self.chunk_left = None
 314.         else:
 315.             self.chunked = 0
 316. 
 317.         # will the connection close at the end of the response?
 318.         self.will_close = self._check_close()
 319. 
 320.         # do we have a Content-Length?
 321.         # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
 322.         length = self.msg.getheader('content-length')
 323.         if length and not self.chunked:
 324.             try:
 325.                 self.length = int(length)
 326.             except ValueError:
 327.                 self.length = None
 328.         else:
 329.             self.length = None
 330. 
 331.         # does the body have a fixed length? (of zero)
 332.         if (status == 204 or            # No Content
 333.             status == 304 or            # Not Modified
 334.             100 <= status < 200 or      # 1xx codes
 335.             self._method == 'HEAD'):
 336.             self.length = 0
 337. 
 338.         # if the connection remains open, and we aren't using chunked, and
 339.         # a content-length was not provided, then assume that the connection
 340.         # WILL close.
 341.         if not self.will_close and \
 342.            not self.chunked and \
 343.            self.length is None:
 344.             self.will_close = 1
 345. 
 346.     def _check_close(self):
 347.         if self.version == 11:
 348.             # An HTTP/1.1 proxy is assumed to stay open unless
 349.             # explicitly closed.
 350.             conn = self.msg.getheader('connection')
 351.             if conn and conn.lower().find("close") >= 0:
 352.                 return True
 353.             return False
 354. 
 355.         # An HTTP/1.0 response with a Connection header is probably
 356.         # the result of a confused proxy.  Ignore it.
 357. 
 358.         # For older HTTP, Keep-Alive indiciates persistent connection.
 359.         if self.msg.getheader('keep-alive'):
 360.             return False
 361. 
 362.         # Proxy-Connection is a netscape hack.
 363.         pconn = self.msg.getheader('proxy-connection')
 364.         if pconn and pconn.lower().find("keep-alive") >= 0:
 365.             return False
 366. 
 367.         # otherwise, assume it will close
 368.         return True
 369. 
 370.     def close(self):
 371.         if self.fp:
 372.             self.fp.close()
 373.             self.fp = None
 374. 
 375.     def isclosed(self):
 376.         # NOTE: it is possible that we will not ever call self.close(). This
 377.         #       case occurs when will_close is TRUE, length is None, and we
 378.         #       read up to the last byte, but NOT past it.
 379.         #
 380.         # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
 381.         #          called, meaning self.isclosed() is meaningful.
 382.         return self.fp is None
 383. 
 384.     def read(self, amt=None):
 385.         if self.fp is None:
 386.             return ''
 387. 
 388.         if self.chunked:
 389.             return self._read_chunked(amt)
 390. 
 391.         if amt is None:
 392.             # unbounded read
 393.             if self.will_close:
 394.                 s = self.fp.read()
 395.             else:
 396.                 s = self._safe_read(self.length)
 397.             self.close()        # we read everything
 398.             return s
 399. 
 400.         if self.length is not None:
 401.             if amt > self.length:
 402.                 # clip the read to the "end of response"
 403.                 amt = self.length
 404.             self.length -= amt
 405. 
 406.         # we do not use _safe_read() here because this may be a .will_close
 407.         # connection, and the user is reading more bytes than will be provided
 408.         # (for example, reading in 1k chunks)
 409.         s = self.fp.read(amt)
 410. 
 411.         return s
 412. 
 413.     def _read_chunked(self, amt):
 414.         assert self.chunked != _UNKNOWN
 415.         chunk_left = self.chunk_left
 416.         value = ''
 417. 
 418.         # XXX This accumulates chunks by repeated string concatenation,
 419.         # which is not efficient as the number or size of chunks gets big.
 420.         while True:
 421.             if chunk_left is None:
 422.                 line = self.fp.readline()
 423.                 i = line.find(';')
 424.                 if i >= 0:
 425.                     line = line[:i] # strip chunk-extensions
 426.                 chunk_left = int(line, 16)
 427.                 if chunk_left == 0:
 428.                     break
 429.             if amt is None:
 430.                 value += self._safe_read(chunk_left)
 431.             elif amt < chunk_left:
 432.                 value += self._safe_read(amt)
 433.                 self.chunk_left = chunk_left - amt
 434.                 return value
 435.             elif amt == chunk_left:
 436.                 value += self._safe_read(amt)
 437.                 self._safe_read(2)  # toss the CRLF at the end of the chunk
 438.                 self.chunk_left = None
 439.                 return value
 440.             else:
 441.                 value += self._safe_read(chunk_left)
 442.                 amt -= chunk_left
 443. 
 444.             # we read the whole chunk, get another
 445.             self._safe_read(2)      # toss the CRLF at the end of the chunk
 446.             chunk_left = None
 447. 
 448.         # read and discard trailer up to the CRLF terminator
 449.         ### note: we shouldn't have any trailers!
 450.         while True:
 451.             line = self.fp.readline()
 452.             if line == '\r\n':
 453.                 break
 454. 
 455.         # we read everything; close the "file"
 456.         self.close()
 457. 
 458.         return value
 459. 
 460.     def _safe_read(self, amt):
 461.         """Read the number of bytes requested, compensating for partial reads.
 462. 
 463.         Normally, we have a blocking socket, but a read() can be interrupted
 464.         by a signal (resulting in a partial read).
 465. 
 466.         Note that we cannot distinguish between EOF and an interrupt when zero
 467.         bytes have been read. IncompleteRead() will be raised in this
 468.         situation.
 469. 
 470.         This function should be used when <amt> bytes "should" be present for
 471.         reading. If the bytes are truly not available (due to EOF), then the
 472.         IncompleteRead exception can be used to detect the problem.
 473.         """
 474.         s = ''
 475.         while amt > 0:
 476.             chunk = self.fp.read(amt)
 477.             if not chunk:
 478.                 raise IncompleteRead(s)
 479.             s += chunk
 480.             amt -= len(chunk)
 481.         return s
 482. 
 483.     def getheader(self, name, default=None):
 484.         if self.msg is None:
 485.             raise ResponseNotReady()
 486.         return self.msg.getheader(name, default)
 487. 
 488. 
 489. class HTTPConnection:
 490. 
 491.     _http_vsn = 11
 492.     _http_vsn_str = 'HTTP/1.1'
 493. 
 494.     response_class = HTTPResponse
 495.     default_port = HTTP_PORT
 496.     auto_open = 1
 497.     debuglevel = 0
 498.     strict = 0
 499. 
 500.     def __init__(self, host, port=None, strict=None):
 501.         self.sock = None
 502.         self._buffer = []
 503.         self.__response = None
 504.         self.__state = _CS_IDLE
 505.         self._method = None
 506. 
 507.         self._set_hostport(host, port)
 508.         if strict is not None:
 509.             self.strict = strict
 510. 
 511.     def _set_hostport(self, host, port):
 512.         if port is None:
 513.             i = host.rfind(':')
 514.             j = host.rfind(']')         # ipv6 addresses have [...]
 515.             if i > j:
 516.                 try:
 517.                     port = int(host[i+1:])
 518.                 except ValueError:
 519.                     raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
 520.                 host = host[:i]
 521.             else:
 522.                 port = self.default_port
 523.             if host[0] == '[' and host[-1] == ']':
 524.                 host = host[1:-1]
 525.         self.host = host
 526.         self.port = port
 527. 
 528.     def set_debuglevel(self, level):
 529.         self.debuglevel = level
 530. 
 531.     def connect(self):
 532.         """Connect to the host and port specified in __init__."""
 533.         msg = "getaddrinfo returns an empty list"
 534.         for res in socket.getaddrinfo(self.host, self.port, 0,
 535.                                       socket.SOCK_STREAM):
 536.             af, socktype, proto, canonname, sa = res
 537.             try:
 538.                 self.sock = socket.socket(af, socktype, proto)
 539.                 if self.debuglevel > 0:
 540.                     print "connect: (%s, %s)" % (self.host, self.port)
 541.                 self.sock.connect(sa)
 542.             except socket.error, msg:
 543.                 if self.debuglevel > 0:
 544.                     print 'connect fail:', (self.host, self.port)
 545.                 if self.sock:
 546.                     self.sock.close()
 547.                 self.sock = None
 548.                 continue
 549.             break
 550.         if not self.sock:
 551.             raise socket.error, msg
 552. 
 553.     def close(self):
 554.         """Close the connection to the HTTP server."""
 555.         if self.sock:
 556.             self.sock.close()   # close it manually... there may be other refs
 557.             self.sock = None
 558.         if self.__response:
 559.             self.__response.close()
 560.             self.__response = None
 561.         self.__state = _CS_IDLE
 562. 
 563.     def send(self, str):
 564.         """Send `str' to the server."""
 565.         if self.sock is None:
 566.             if self.auto_open:
 567.                 self.connect()
 568.             else:
 569.                 raise NotConnected()
 570. 
 571.         # send the data to the server. if we get a broken pipe, then close
 572.         # the socket. we want to reconnect when somebody tries to send again.
 573.         #
 574.         # NOTE: we DO propagate the error, though, because we cannot simply
 575.         #       ignore the error... the caller will know if they can retry.
 576.         if self.debuglevel > 0:
 577.             print "send:", repr(str)
 578.         try:
 579.             self.sock.sendall(str)
 580.         except socket.error, v:
 581.             if v[0] == 32:      # Broken pipe
 582.                 self.close()
 583.             raise
 584. 
 585.     def _output(self, s):
 586.         """Add a line of output to the current request buffer.
 587. 
 588.         Assumes that the line does *not* end with \\r\\n.
 589.         """
 590.         self._buffer.append(s)
 591. 
 592.     def _send_output(self):
 593.         """Send the currently buffered request and clear the buffer.
 594. 
 595.         Appends an extra \\r\\n to the buffer.
 596.         """
 597.         self._buffer.extend(("", ""))
 598.         msg = "\r\n".join(self._buffer)
 599.         del self._buffer[:]
 600.         self.send(msg)
 601. 
 602.     def putrequest(self, method, url, skip_host=0):
 603.         """Send a request to the server.
 604. 
 605.         `method' specifies an HTTP request method, e.g. 'GET'.
 606.         `url' specifies the object being requested, e.g. '/index.html'.
 607.         """
 608. 
 609.         # if a prior response has been completed, then forget about it.
 610.         if self.__response and self.__response.isclosed():
 611.             self.__response = None
 612. 
 613.         #
 614.         # in certain cases, we cannot issue another request on this connection.
 615.         # this occurs when:
 616.         #   1) we are in the process of sending a request.   (_CS_REQ_STARTED)
 617.         #   2) a response to a previous request has signalled that it is going
 618.         #      to close the connection upon completion.
 619.         #   3) the headers for the previous response have not been read, thus
 620.         #      we cannot determine whether point (2) is true.   (_CS_REQ_SENT)
 621.         #
 622.         # if there is no prior response, then we can request at will.
 623.         #
 624.         # if point (2) is true, then we will have passed the socket to the
 625.         # response (effectively meaning, "there is no prior response"), and
 626.         # will open a new one when a new request is made.
 627.         #
 628.         # Note: if a prior response exists, then we *can* start a new request.
 629.         #       We are not allowed to begin fetching the response to this new
 630.         #       request, however, until that prior response is complete.
 631.         #
 632.         if self.__state == _CS_IDLE:
 633.             self.__state = _CS_REQ_STARTED
 634.         else:
 635.             raise CannotSendRequest()
 636. 
 637.         # Save the method we use, we need it later in the response phase
 638.         self._method = method
 639.         if not url:
 640.             url = '/'
 641.         str = '%s %s %s' % (method, url, self._http_vsn_str)
 642. 
 643.         self._output(str)
 644. 
 645.         if self._http_vsn == 11:
 646.             # Issue some standard headers for better HTTP/1.1 compliance
 647. 
 648.             if not skip_host:
 649.                 # this header is issued *only* for HTTP/1.1
 650.                 # connections. more specifically, this means it is
 651.                 # only issued when the client uses the new
 652.                 # HTTPConnection() class. backwards-compat clients
 653.                 # will be using HTTP/1.0 and those clients may be
 654.                 # issuing this header themselves. we should NOT issue
 655.                 # it twice; some web servers (such as Apache) barf
 656.                 # when they see two Host: headers
 657. 
 658.                 # If we need a non-standard port,include it in the
 659.                 # header.  If the request is going through a proxy,
 660.                 # but the host of the actual URL, not the host of the
 661.                 # proxy.
 662. 
 663.                 netloc = ''
 664.                 if url.startswith('http'):
 665.                     nil, netloc, nil, nil, nil = urlsplit(url)
 666. 
 667.                 if netloc:
 668.                     self.putheader('Host', netloc.encode("idna"))
 669.                 elif self.port == HTTP_PORT:
 670.                     self.putheader('Host', self.host.encode("idna"))
 671.                 else:
 672.                     self.putheader('Host', "%s:%s" % (self.host.encode("idna"), self.port))
 673. 
 674.             # note: we are assuming that clients will not attempt to set these
 675.             #       headers since *this* library must deal with the
 676.             #       consequences. this also means that when the supporting
 677.             #       libraries are updated to recognize other forms, then this
 678.             #       code should be changed (removed or updated).
 679. 
 680.             # we only want a Content-Encoding of "identity" since we don't
 681.             # support encodings such as x-gzip or x-deflate.
 682.             self.putheader('Accept-Encoding', 'identity')
 683. 
 684.             # we can accept "chunked" Transfer-Encodings, but no others
 685.             # NOTE: no TE header implies *only* "chunked"
 686.             #self.putheader('TE', 'chunked')
 687. 
 688.             # if TE is supplied in the header, then it must appear in a
 689.             # Connection header.
 690.             #self.putheader('Connection', 'TE')
 691. 
 692.         else:
 693.             # For HTTP/1.0, the server will assume "not chunked"
 694.             pass
 695. 
 696.     def putheader(self, header, value):
 697.         """Send a request header line to the server.
 698. 
 699.         For example: h.putheader('Accept', 'text/html')
 700.         """
 701.         if self.__state != _CS_REQ_STARTED:
 702.             raise CannotSendHeader()
 703. 
 704.         str = '%s: %s' % (header, value)
 705.         self._output(str)
 706. 
 707.     def endheaders(self):
 708.         """Indicate that the last header line has been sent to the server."""
 709. 
 710.         if self.__state == _CS_REQ_STARTED:
 711.             self.__state = _CS_REQ_SENT
 712.         else:
 713.             raise CannotSendHeader()
 714. 
 715.         self._send_output()
 716. 
 717.     def request(self, method, url, body=None, headers={}):
 718.         """Send a complete request to the server."""
 719. 
 720.         try:
 721.             self._send_request(method, url, body, headers)
 722.         except socket.error, v:
 723.             # trap 'Broken pipe' if we're allowed to automatically reconnect
 724.             if v[0] != 32 or not self.auto_open:
 725.                 raise
 726.             # try one more time
 727.             self._send_request(method, url, body, headers)
 728. 
 729.     def _send_request(self, method, url, body, headers):
 730.         # If headers already contains a host header, then define the
 731.         # optional skip_host argument to putrequest().  The check is
 732.         # more delicate because field names are case insensitive.
 733.         if 'host' in [k.lower() for k in headers]:
 734.             self.putrequest(method, url, skip_host=1)
 735.         else:
 736.             self.putrequest(method, url)
 737. 
 738.         if body:
 739.             self.putheader('Content-Length', str(len(body)))
 740.         for hdr, value in headers.iteritems():
 741.             self.putheader(hdr, value)
 742.         self.endheaders()
 743. 
 744.         if body:
 745.             self.send(body)
 746. 
 747.     def getresponse(self):
 748.         "Get the response from the server."
 749. 
 750.         # if a prior response has been completed, then forget about it.
 751.         if self.__response and self.__response.isclosed():
 752.             self.__response = None
 753. 
 754.         #
 755.         # if a prior response exists, then it must be completed (otherwise, we
 756.         # cannot read this response's header to determine the connection-close
 757.         # behavior)
 758.         #
 759.         # note: if a prior response existed, but was connection-close, then the
 760.         # socket and response were made independent of this HTTPConnection
 761.         # object since a new request requires that we open a whole new
 762.         # connection
 763.         #
 764.         # this means the prior response had one of two states:
 765.         #   1) will_close: this connection was reset and the prior socket and
 766.         #                  response operate independently
 767.         #   2) persistent: the response was retained and we await its
 768.         #                  isclosed() status to become true.
 769.         #
 770.         if self.__state != _CS_REQ_SENT or self.__response:
 771.             raise ResponseNotReady()
 772. 
 773.         if self.debuglevel > 0:
 774.             response = self.response_class(self.sock, self.debuglevel,
 775.                                            strict=self.strict,
 776.                                            method=self._method)
 777.         else:
 778.             response = self.response_class(self.sock, strict=self.strict,
 779.                                            method=self._method)
 780. 
 781.         response.begin()
 782.         assert response.will_close != _UNKNOWN
 783.         self.__state = _CS_IDLE
 784. 
 785.         if response.will_close:
 786.             # this effectively passes the connection to the response
 787.             self.close()
 788.         else:
 789.             # remember this, so we can tell when it is complete
 790.             self.__response = response
 791. 
 792.         return response
 793. 
 794. # The next several classes are used to define FakeSocket,a socket-like
 795. # interface to an SSL connection.
 796. 
 797. # The primary complexity comes from faking a makefile() method.  The
 798. # standard socket makefile() implementation calls dup() on the socket
 799. # file descriptor.  As a consequence, clients can call close() on the
 800. # parent socket and its makefile children in any order.  The underlying
 801. # socket isn't closed until they are all closed.
 802. 
 803. # The implementation uses reference counting to keep the socket open
 804. # until the last client calls close().  SharedSocket keeps track of
 805. # the reference counting and SharedSocketClient provides an constructor
 806. # and close() method that call incref() and decref() correctly.
 807. 
 808. class SharedSocket:
 809. 
 810.     def __init__(self, sock):
 811.         self.sock = sock
 812.         self._refcnt = 0
 813. 
 814.     def incref(self):
 815.         self._refcnt += 1
 816. 
 817.     def decref(self):
 818.         self._refcnt -= 1
 819.         assert self._refcnt >= 0
 820.         if self._refcnt == 0:
 821.             self.sock.close()
 822. 
 823.     def __del__(self):
 824.         self.sock.close()
 825. 
 826. class SharedSocketClient:
 827. 
 828.     def __init__(self, shared):
 829.         self._closed = 0
 830.         self._shared = shared
 831.         self._shared.incref()
 832.         self._sock = shared.sock
 833. 
 834.     def close(self):
 835.         if not self._closed:
 836.             self._shared.decref()
 837.             self._closed = 1
 838.             self._shared = None
 839. 
 840. class SSLFile(SharedSocketClient):
 841.     """File-like object wrapping an SSL socket."""
 842. 
 843.     BUFSIZE = 8192
 844. 
 845.     def __init__(self, sock, ssl, bufsize=None):
 846.         SharedSocketClient.__init__(self, sock)
 847.         self._ssl = ssl
 848.         self._buf = ''
 849.         self._bufsize = bufsize or self.__class__.BUFSIZE
 850. 
 851.     def _read(self):
 852.         buf = ''
 853.         # put in a loop so that we retry on transient errors
 854.         while True:
 855.             try:
 856.                 buf = self._ssl.read(self._bufsize)
 857.             except socket.sslerror, err:
 858.                 if (err[0] == socket.SSL_ERROR_WANT_READ
 859.                     or err[0] == socket.SSL_ERROR_WANT_WRITE):
 860.                     continue
 861.                 if (err[0] == socket.SSL_ERROR_ZERO_RETURN
 862.                     or err[0] == socket.SSL_ERROR_EOF):
 863.                     break
 864.                 raise
 865.             except socket.error, err:
 866.                 if err[0] == errno.EINTR:
 867.                     continue
 868.                 if err[0] == errno.EBADF:
 869.                     # XXX socket was closed?
 870.                     break
 871.                 raise
 872.             else:
 873.                 break
 874.         return buf
 875. 
 876.     def read(self, size=None):
 877.         L = [self._buf]
 878.         avail = len(self._buf)
 879.         while size is None or avail < size:
 880.             s = self._read()
 881.             if s == '':
 882.                 break
 883.             L.append(s)
 884.             avail += len(s)
 885.         all = "".join(L)
 886.         if size is None:
 887.             self._buf = ''
 888.             return all
 889.         else:
 890.             self._buf = all[size:]
 891.             return all[:size]
 892. 
 893.     def readline(self):
 894.         L = [self._buf]
 895.         self._buf = ''
 896.         while 1:
 897.             i = L[-1].find("\n")
 898.             if i >= 0:
 899.                 break
 900.             s = self._read()
 901.             if s == '':
 902.                 break
 903.             L.append(s)
 904.         if i == -1:
 905.             # loop exited because there is no more data
 906.             return "".join(L)
 907.         else:
 908.             all = "".join(L)
 909.             # XXX could do enough bookkeeping not to do a 2nd search
 910.             i = all.find("\n") + 1
 911.             line = all[:i]
 912.             self._buf = all[i:]
 913.             return line
 914. 
 915.     def readlines(self, sizehint=0):
 916.         total = 0
 917.         list = []
 918.         while True:
 919.             line = self.readline()
 920.             if not line:
 921.                 break
 922.             list.append(line)
 923.             total += len(line)
 924.             if sizehint and total >= sizehint:
 925.                 break
 926.         return list
 927. 
 928.     def fileno(self):
 929.         return self._sock.fileno()
 930. 
 931.     def __iter__(self):
 932.         return self
 933. 
 934.     def next(self):
 935.         line = self.readline()
 936.         if not line:
 937.             raise StopIteration
 938.         return line
 939. 
 940. class FakeSocket(SharedSocketClient):
 941. 
 942.     class _closedsocket:
 943.         def __getattr__(self, name):
 944.             raise error(9, 'Bad file descriptor')
 945. 
 946.     def __init__(self, sock, ssl):
 947.         sock = SharedSocket(sock)
 948.         SharedSocketClient.__init__(self, sock)
 949.         self._ssl = ssl
 950. 
 951.     def close(self):
 952.         SharedSocketClient.close(self)
 953.         self._sock = self.__class__._closedsocket()
 954. 
 955.     def makefile(self, mode, bufsize=None):
 956.         if mode != 'r' and mode != 'rb':
 957.             raise UnimplementedFileMode()
 958.         return SSLFile(self._shared, self._ssl, bufsize)
 959. 
 960.     def send(self, stuff, flags = 0):
 961.         return self._ssl.write(stuff)
 962. 
 963.     sendall = send
 964. 
 965.     def recv(self, len = 1024, flags = 0):
 966.         return self._ssl.read(len)
 967. 
 968.     def __getattr__(self, attr):
 969.         return getattr(self._sock, attr)
 970. 
 971. 
 972. class HTTPSConnection(HTTPConnection):
 973.     "This class allows communication via SSL."
 974. 
 975.     default_port = HTTPS_PORT
 976. 
 977.     def __init__(self, host, port=None, key_file=None, cert_file=None,
 978.                  strict=None):
 979.         HTTPConnection.__init__(self, host, port, strict)
 980.         self.key_file = key_file
 981.         self.cert_file = cert_file
 982. 
 983.     def connect(self):
 984.         "Connect to a host on a given (SSL) port."
 985. 
 986.         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 987.         sock.connect((self.host, self.port))
 988.         ssl = socket.ssl(sock, self.key_file, self.cert_file)
 989.         self.sock = FakeSocket(sock, ssl)
 990. 
 991. 
 992. class HTTP:
 993.     "Compatibility class with httplib.py from 1.5."
 994. 
 995.     _http_vsn = 10
 996.     _http_vsn_str = 'HTTP/1.0'
 997. 
 998.     debuglevel = 0
 999. 
1000.     _connection_class = HTTPConnection
1001. 
1002.     def __init__(self, host='', port=None, strict=None):
1003.         "Provide a default host, since the superclass requires one."
1004. 
1005.         # some joker passed 0 explicitly, meaning default port
1006.         if port == 0:
1007.             port = None
1008. 
1009.         # Note that we may pass an empty string as the host; this will throw
1010.         # an error when we attempt to connect. Presumably, the client code
1011.         # will call connect before then, with a proper host.
1012.         self._setup(self._connection_class(host, port, strict))
1013. 
1014.     def _setup(self, conn):
1015.         self._conn = conn
1016. 
1017.         # set up delegation to flesh out interface
1018.         self.send = conn.send
1019.         self.putrequest = conn.putrequest
1020.         self.endheaders = conn.endheaders
1021.         self.set_debuglevel = conn.set_debuglevel
1022. 
1023.         conn._http_vsn = self._http_vsn
1024.         conn._http_vsn_str = self._http_vsn_str
1025. 
1026.         self.file = None
1027. 
1028.     def connect(self, host=None, port=None):
1029.         "Accept arguments to set the host/port, since the superclass doesn't."
1030. 
1031.         if host is not None:
1032.             self._conn._set_hostport(host, port)
1033.         self._conn.connect()
1034. 
1035.     def getfile(self):
1036.         "Provide a getfile, since the superclass' does not use this concept."
1037.         return self.file
1038. 
1039.     def putheader(self, header, *values):
1040.         "The superclass allows only one value argument."
1041.         self._conn.putheader(header, '\r\n\t'.join(values))
1042. 
1043.     def getreply(self):
1044.         """Compat definition since superclass does not define it.
1045. 
1046.         Returns a tuple consisting of:
1047.         - server status code (e.g. '200' if all goes well)
1048.         - server "reason" corresponding to status code
1049.         - any RFC822 headers in the response from the server
1050.         """
1051.         try:
1052.             response = self._conn.getresponse()
1053.         except BadStatusLine, e:
1054.             ### hmm. if getresponse() ever closes the socket on a bad request,
1055.             ### then we are going to have problems with self.sock
1056. 
1057.             ### should we keep this behavior? do people use it?
1058.             # keep the socket open (as a file), and return it
1059.             self.file = self._conn.sock.makefile('rb', 0)
1060. 
1061.             # close our socket -- we want to restart after any protocol error
1062.             self.close()
1063. 
1064.             self.headers = None
1065.             return -1, e.line, None
1066. 
1067.         self.headers = response.msg
1068.         self.file = response.fp
1069.         return response.status, response.reason, response.msg
1070. 
1071.     def close(self):
1072.         self._conn.close()
1073. 
1074.         # note that self.file == response.fp, which gets closed by the
1075.         # superclass. just clear the object ref here.
1076.         ### hmm. messy. if status==-1, then self.file is owned by us.
1077.         ### well... we aren't explicitly closing, but losing this ref will
1078.         ### do it
1079.         self.file = None
1080. 
1081. if hasattr(socket, 'ssl'):
1082.     class HTTPS(HTTP):
1083.         """Compatibility with 1.5 httplib interface
1084. 
1085.         Python 1.5.2 did not have an HTTPS class, but it defined an
1086.         interface for sending http requests that is also useful for
1087.         https.
1088.         """
1089. 
1090.         _connection_class = HTTPSConnection
1091. 
1092.         def __init__(self, host='', port=None, key_file=None, cert_file=None,
1093.                      strict=None):
1094.             # provide a default host, pass the X509 cert info
1095. 
1096.             # urf. compensate for bad input.
1097.             if port == 0:
1098.                 port = None
1099.             self._setup(self._connection_class(host, port, key_file,
1100.                                                cert_file, strict))
1101. 
1102.             # we never actually use these for anything, but we keep them
1103.             # here for compatibility with post-1.5.2 CVS.
1104.             self.key_file = key_file
1105.             self.cert_file = cert_file
1106. 
1107. 
1108. class HTTPException(Exception):
1109.     # Subclasses that define an __init__ must call Exception.__init__
1110.     # or define self.args.  Otherwise, str() will fail.
1111.     pass
1112. 
1113. class NotConnected(HTTPException):
1114.     pass
1115. 
1116. class InvalidURL(HTTPException):
1117.     pass
1118. 
1119. class UnknownProtocol(HTTPException):
1120.     def __init__(self, version):
1121.         self.args = version,
1122.         self.version = version
1123. 
1124. class UnknownTransferEncoding(HTTPException):
1125.     pass
1126. 
1127. class UnimplementedFileMode(HTTPException):
1128.     pass
1129. 
1130. class IncompleteRead(HTTPException):
1131.     def __init__(self, partial):
1132.         self.args = partial,
1133.         self.partial = partial
1134. 
1135. class ImproperConnectionState(HTTPException):
1136.     pass
1137. 
1138. class CannotSendRequest(ImproperConnectionState):
1139.     pass
1140. 
1141. class CannotSendHeader(ImproperConnectionState):
1142.     pass
1143. 
1144. class ResponseNotReady(ImproperConnectionState):
1145.     pass
1146. 
1147. class BadStatusLine(HTTPException):
1148.     def __init__(self, line):
1149.         self.args = line,
1150.         self.line = line
1151. 
1152. # for backwards compatibility
1153. error = HTTPException
1154. 
1155. class LineAndFileWrapper:
1156.     """A limited file-like object for HTTP/0.9 responses."""
1157. 
1158.     # The status-line parsing code calls readline(), which normally
1159.     # get the HTTP status line.  For a 0.9 response, however, this is
1160.     # actually the first line of the body!  Clients need to get a
1161.     # readable file object that contains that line.
1162. 
1163.     def __init__(self, line, file):
1164.         self._line = line
1165.         self._file = file
1166.         self._line_consumed = 0
1167.         self._line_offset = 0
1168.         self._line_left = len(line)
1169. 
1170.     def __getattr__(self, attr):
1171.         return getattr(self._file, attr)
1172. 
1173.     def _done(self):
1174.         # called when the last byte is read from the line.  After the
1175.         # call, all read methods are delegated to the underlying file
1176.         # object.
1177.         self._line_consumed = 1
1178.         self.read = self._file.read
1179.         self.readline = self._file.readline
1180.         self.readlines = self._file.readlines
1181. 
1182.     def read(self, amt=None):
1183.         assert not self._line_consumed and self._line_left
1184.         if amt is None or amt > self._line_left:
1185.             s = self._line[self._line_offset:]
1186.             self._done()
1187.             if amt is None:
1188.                 return s + self._file.read()
1189.             else:
1190.                 return s + self._file.read(amt - len(s))
1191.         else:
1192.             assert amt <= self._line_left
1193.             i = self._line_offset
1194.             j = i + amt
1195.             s = self._line[i:j]
1196.             self._line_offset = j
1197.             self._line_left -= amt
1198.             if self._line_left == 0:
1199.                 self._done()
1200.             return s
1201. 
1202.     def readline(self):
1203.         s = self._line[self._line_offset:]
1204.         self._done()
1205.         return s
1206. 
1207.     def readlines(self, size=None):
1208.         L = [self._line[self._line_offset:]]
1209.         self._done()
1210.         if size is None:
1211.             return L + self._file.readlines()
1212.         else:
1213.             return L + self._file.readlines(size)
1214. 
1215. def test():
1216.     """Test this module.
1217. 
1218.     A hodge podge of tests collected here, because they have too many
1219.     external dependencies for the regular test suite.
1220.     """
1221. 
1222.     import sys
1223.     import getopt
1224.     opts, args = getopt.getopt(sys.argv[1:], 'd')
1225.     dl = 0
1226.     for o, a in opts:
1227.         if o == '-d': dl = dl + 1
1228.     host = 'www.python.org'
1229.     selector = '/'
1230.     if args[0:]: host = args[0]
1231.     if args[1:]: selector = args[1]
1232.     h = HTTP()
1233.     h.set_debuglevel(dl)
1234.     h.connect(host)
1235.     h.putrequest('GET', selector)
1236.     h.endheaders()
1237.     status, reason, headers = h.getreply()
1238.     print 'status =', status
1239.     print 'reason =', reason
1240.     print "read", len(h.getfile().read())
1241.     print
1242.     if headers:
1243.         for header in headers.headers: print header.strip()
1244.     print
1245. 
1246.     # minimal test that code to extract host from url works
1247.     class HTTP11(HTTP):
1248.         _http_vsn = 11
1249.         _http_vsn_str = 'HTTP/1.1'
1250. 
1251.     h = HTTP11('www.python.org')
1252.     h.putrequest('GET', 'http://www.python.org/~jeremy/')
1253.     h.endheaders()
1254.     h.getreply()
1255.     h.close()
1256. 
1257.     if hasattr(socket, 'ssl'):
1258. 
1259.         for host, selector in (('sourceforge.net', '/projects/python'),
1260.                                ):
1261.             print "https://%s%s" % (host, selector)
1262.             hs = HTTPS()
1263.             hs.set_debuglevel(dl)
1264.             hs.connect(host)
1265.             hs.putrequest('GET', selector)
1266.             hs.endheaders()
1267.             status, reason, headers = hs.getreply()
1268.             print 'status =', status
1269.             print 'reason =', reason
1270.             print "read", len(hs.getfile().read())
1271.             print
1272.             if headers:
1273.                 for header in headers.headers: print header.strip()
1274.             print
1275. 
1276. if __name__ == '__main__':
1277.     test()