source file: /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/httplib.py
file stats: 702 lines, 133 executed: 18.9% covered
1. """HTTP/1.1 client library 2. 3. <intro stuff goes here> 4. <other stuff, too> 5. 6. HTTPConnection go through a number of "states", which defines when a client 7. may legally make another request or fetch the response for a particular 8. request. This diagram details these state transitions: 9. 10. (null) 11. | 12. | HTTPConnection() 13. v 14. Idle 15. | 16. | putrequest() 17. v 18. Request-started 19. | 20. | ( putheader() )* endheaders() 21. v 22. Request-sent 23. | 24. | response = getresponse() 25. v 26. Unread-response [Response-headers-read] 27. |\____________________ 28. | | 29. | response.read() | putrequest() 30. v v 31. Idle Req-started-unread-response 32. ______/| 33. / | 34. response.read() | | ( putheader() )* endheaders() 35. v v 36. Request-started Req-sent-unread-response 37. | 38. | response.read() 39. v 40. Request-sent 41. 42. This diagram presents the following rules: 43. -- a second request may not be started until {response-headers-read} 44. -- a response [object] cannot be retrieved until {request-sent} 45. -- there is no differentiation between an unread response body and a 46. partially read response body 47. 48. Note: this enforcement is applied by the HTTPConnection class. The 49. HTTPResponse class does not enforce this state machine, which 50. implies sophisticated clients may accelerate the request/response 51. pipeline. Caution should be taken, though: accelerating the states 52. beyond the above pattern may imply knowledge of the server's 53. connection-close behavior for certain requests. For example, it 54. is impossible to tell whether the server will close the connection 55. UNTIL the response headers have been read; this means that further 56. requests cannot be placed into the pipeline until it is known that 57. the server will NOT be closing the connection. 58. 59. Logical State __state __response 60. ------------- ------- ---------- 61. Idle _CS_IDLE None 62. Request-started _CS_REQ_STARTED None 63. Request-sent _CS_REQ_SENT None 64. Unread-response _CS_IDLE <response_class> 65. Req-started-unread-response _CS_REQ_STARTED <response_class> 66. Req-sent-unread-response _CS_REQ_SENT <response_class> 67. """ 68. 69. import errno 70. import mimetools 71. import socket 72. from urlparse import urlsplit 73. 74. try: 75. from cStringIO import StringIO 76. except ImportError: 77. from StringIO import StringIO 78. 79. __all__ = ["HTTP", "HTTPResponse", "HTTPConnection", "HTTPSConnection", 80. "HTTPException", "NotConnected", "UnknownProtocol", 81. "UnknownTransferEncoding", "UnimplementedFileMode", 82. "IncompleteRead", "InvalidURL", "ImproperConnectionState", 83. "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", 84. "BadStatusLine", "error"] 85. 86. HTTP_PORT = 80 87. HTTPS_PORT = 443 88. 89. _UNKNOWN = 'UNKNOWN' 90. 91. # connection states 92. _CS_IDLE = 'Idle' 93. _CS_REQ_STARTED = 'Request-started' 94. _CS_REQ_SENT = 'Request-sent' 95. 96. class HTTPMessage(mimetools.Message): 97. 98. def addheader(self, key, value): 99. """Add header for field key handling repeats.""" 100. prev = self.dict.get(key) 101. if prev is None: 102. self.dict[key] = value 103. else: 104. combined = ", ".join((prev, value)) 105. self.dict[key] = combined 106. 107. def addcontinue(self, key, more): 108. """Add more field data from a continuation line.""" 109. prev = self.dict[key] 110. self.dict[key] = prev + "\n " + more 111. 112. def readheaders(self): 113. """Read header lines. 114. 115. Read header lines up to the entirely blank line that terminates them. 116. The (normally blank) line that ends the headers is skipped, but not 117. included in the returned list. If a non-header line ends the headers, 118. (which is an error), an attempt is made to backspace over it; it is 119. never included in the returned list. 120. 121. The variable self.status is set to the empty string if all went well, 122. otherwise it is an error message. The variable self.headers is a 123. completely uninterpreted list of lines contained in the header (so 124. printing them will reproduce the header exactly as it appears in the 125. file). 126. 127. If multiple header fields with the same name occur, they are combined 128. according to the rules in RFC 2616 sec 4.2: 129. 130. Appending each subsequent field-value to the first, each separated 131. by a comma. The order in which header fields with the same field-name 132. are received is significant to the interpretation of the combined 133. field value. 134. """ 135. # XXX The implementation overrides the readheaders() method of 136. # rfc822.Message. The base class design isn't amenable to 137. # customized behavior here so the method here is a copy of the 138. # base class code with a few small changes. 139. 140. self.dict = {} 141. self.unixfrom = '' 142. self.headers = hlist = [] 143. self.status = '' 144. headerseen = "" 145. firstline = 1 146. startofline = unread = tell = None 147. if hasattr(self.fp, 'unread'): 148. unread = self.fp.unread 149. elif self.seekable: 150. tell = self.fp.tell 151. while True: 152. if tell: 153. try: 154. startofline = tell() 155. except IOError: 156. startofline = tell = None 157. self.seekable = 0 158. line = self.fp.readline() 159. if not line: 160. self.status = 'EOF in headers' 161. break 162. # Skip unix From name time lines 163. if firstline and line.startswith('From '): 164. self.unixfrom = self.unixfrom + line 165. continue 166. firstline = 0 167. if headerseen and line[0] in ' \t': 168. # XXX Not sure if continuation lines are handled properly 169. # for http and/or for repeating headers 170. # It's a continuation line. 171. hlist.append(line) 172. self.addcontinue(headerseen, line.strip()) 173. continue 174. elif self.iscomment(line): 175. # It's a comment. Ignore it. 176. continue 177. elif self.islast(line): 178. # Note! No pushback here! The delimiter line gets eaten. 179. break 180. headerseen = self.isheader(line) 181. if headerseen: 182. # It's a legal header line, save it. 183. hlist.append(line) 184. self.addheader(headerseen, line[len(headerseen)+1:].strip()) 185. continue 186. else: 187. # It's not a header line; throw it back and stop here. 188. if not self.dict: 189. self.status = 'No headers' 190. else: 191. self.status = 'Non-header line where header expected' 192. # Try to undo the read. 193. if unread: 194. unread(line) 195. elif tell: 196. self.fp.seek(startofline) 197. else: 198. self.status = self.status + '; bad seek' 199. break 200. 201. class HTTPResponse: 202. 203. # strict: If true, raise BadStatusLine if the status line can't be 204. # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is 205. # false because it prevents clients from talking to HTTP/0.9 206. # servers. Note that a response with a sufficiently corrupted 207. # status line will look like an HTTP/0.9 response. 208. 209. # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. 210. 211. def __init__(self, sock, debuglevel=0, strict=0, method=None): 212. self.fp = sock.makefile('rb', 0) 213. self.debuglevel = debuglevel 214. self.strict = strict 215. self._method = method 216. 217. self.msg = None 218. 219. # from the Status-Line of the response 220. self.version = _UNKNOWN # HTTP-Version 221. self.status = _UNKNOWN # Status-Code 222. self.reason = _UNKNOWN # Reason-Phrase 223. 224. self.chunked = _UNKNOWN # is "chunked" being used? 225. self.chunk_left = _UNKNOWN # bytes left to read in current chunk 226. self.length = _UNKNOWN # number of bytes left in response 227. self.will_close = _UNKNOWN # conn will close at end of response 228. 229. def _read_status(self): 230. # Initialize with Simple-Response defaults 231. line = self.fp.readline() 232. if self.debuglevel > 0: 233. print "reply:", repr(line) 234. if not line: 235. # Presumably, the server closed the connection before 236. # sending a valid response. 237. raise BadStatusLine(line) 238. try: 239. [version, status, reason] = line.split(None, 2) 240. except ValueError: 241. try: 242. [version, status] = line.split(None, 1) 243. reason = "" 244. except ValueError: 245. # empty version will cause next test to fail and status 246. # will be treated as 0.9 response. 247. version = "" 248. if not version.startswith('HTTP/'): 249. if self.strict: 250. self.close() 251. raise BadStatusLine(line) 252. else: 253. # assume it's a Simple-Response from an 0.9 server 254. self.fp = LineAndFileWrapper(line, self.fp) 255. return "HTTP/0.9", 200, "" 256. 257. # The status code is a three-digit number 258. try: 259. status = int(status) 260. if status < 100 or status > 999: 261. raise BadStatusLine(line) 262. except ValueError: 263. raise BadStatusLine(line) 264. return version, status, reason 265. 266. def begin(self): 267. if self.msg is not None: 268. # we've already started reading the response 269. return 270. 271. # read until we get a non-100 response 272. while True: 273. version, status, reason = self._read_status() 274. if status != 100: 275. break 276. # skip the header from the 100 response 277. while True: 278. skip = self.fp.readline().strip() 279. if not skip: 280. break 281. if self.debuglevel > 0: 282. print "header:", skip 283. 284. self.status = status 285. self.reason = reason.strip() 286. if version == 'HTTP/1.0': 287. self.version = 10 288. elif version.startswith('HTTP/1.'): 289. self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 290. elif version == 'HTTP/0.9': 291. self.version = 9 292. else: 293. raise UnknownProtocol(version) 294. 295. if self.version == 9: 296. self.chunked = 0 297. self.will_close = 1 298. self.msg = HTTPMessage(StringIO()) 299. return 300. 301. self.msg = HTTPMessage(self.fp, 0) 302. if self.debuglevel > 0: 303. for hdr in self.msg.headers: 304. print "header:", hdr, 305. 306. # don't let the msg keep an fp 307. self.msg.fp = None 308. 309. # are we using the chunked-style of transfer encoding? 310. tr_enc = self.msg.getheader('transfer-encoding') 311. if tr_enc and tr_enc.lower() == "chunked": 312. self.chunked = 1 313. self.chunk_left = None 314. else: 315. self.chunked = 0 316. 317. # will the connection close at the end of the response? 318. self.will_close = self._check_close() 319. 320. # do we have a Content-Length? 321. # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" 322. length = self.msg.getheader('content-length') 323. if length and not self.chunked: 324. try: 325. self.length = int(length) 326. except ValueError: 327. self.length = None 328. else: 329. self.length = None 330. 331. # does the body have a fixed length? (of zero) 332. if (status == 204 or # No Content 333. status == 304 or # Not Modified 334. 100 <= status < 200 or # 1xx codes 335. self._method == 'HEAD'): 336. self.length = 0 337. 338. # if the connection remains open, and we aren't using chunked, and 339. # a content-length was not provided, then assume that the connection 340. # WILL close. 341. if not self.will_close and \ 342. not self.chunked and \ 343. self.length is None: 344. self.will_close = 1 345. 346. def _check_close(self): 347. if self.version == 11: 348. # An HTTP/1.1 proxy is assumed to stay open unless 349. # explicitly closed. 350. conn = self.msg.getheader('connection') 351. if conn and conn.lower().find("close") >= 0: 352. return True 353. return False 354. 355. # An HTTP/1.0 response with a Connection header is probably 356. # the result of a confused proxy. Ignore it. 357. 358. # For older HTTP, Keep-Alive indiciates persistent connection. 359. if self.msg.getheader('keep-alive'): 360. return False 361. 362. # Proxy-Connection is a netscape hack. 363. pconn = self.msg.getheader('proxy-connection') 364. if pconn and pconn.lower().find("keep-alive") >= 0: 365. return False 366. 367. # otherwise, assume it will close 368. return True 369. 370. def close(self): 371. if self.fp: 372. self.fp.close() 373. self.fp = None 374. 375. def isclosed(self): 376. # NOTE: it is possible that we will not ever call self.close(). This 377. # case occurs when will_close is TRUE, length is None, and we 378. # read up to the last byte, but NOT past it. 379. # 380. # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be 381. # called, meaning self.isclosed() is meaningful. 382. return self.fp is None 383. 384. def read(self, amt=None): 385. if self.fp is None: 386. return '' 387. 388. if self.chunked: 389. return self._read_chunked(amt) 390. 391. if amt is None: 392. # unbounded read 393. if self.will_close: 394. s = self.fp.read() 395. else: 396. s = self._safe_read(self.length) 397. self.close() # we read everything 398. return s 399. 400. if self.length is not None: 401. if amt > self.length: 402. # clip the read to the "end of response" 403. amt = self.length 404. self.length -= amt 405. 406. # we do not use _safe_read() here because this may be a .will_close 407. # connection, and the user is reading more bytes than will be provided 408. # (for example, reading in 1k chunks) 409. s = self.fp.read(amt) 410. 411. return s 412. 413. def _read_chunked(self, amt): 414. assert self.chunked != _UNKNOWN 415. chunk_left = self.chunk_left 416. value = '' 417. 418. # XXX This accumulates chunks by repeated string concatenation, 419. # which is not efficient as the number or size of chunks gets big. 420. while True: 421. if chunk_left is None: 422. line = self.fp.readline() 423. i = line.find(';') 424. if i >= 0: 425. line = line[:i] # strip chunk-extensions 426. chunk_left = int(line, 16) 427. if chunk_left == 0: 428. break 429. if amt is None: 430. value += self._safe_read(chunk_left) 431. elif amt < chunk_left: 432. value += self._safe_read(amt) 433. self.chunk_left = chunk_left - amt 434. return value 435. elif amt == chunk_left: 436. value += self._safe_read(amt) 437. self._safe_read(2) # toss the CRLF at the end of the chunk 438. self.chunk_left = None 439. return value 440. else: 441. value += self._safe_read(chunk_left) 442. amt -= chunk_left 443. 444. # we read the whole chunk, get another 445. self._safe_read(2) # toss the CRLF at the end of the chunk 446. chunk_left = None 447. 448. # read and discard trailer up to the CRLF terminator 449. ### note: we shouldn't have any trailers! 450. while True: 451. line = self.fp.readline() 452. if line == '\r\n': 453. break 454. 455. # we read everything; close the "file" 456. self.close() 457. 458. return value 459. 460. def _safe_read(self, amt): 461. """Read the number of bytes requested, compensating for partial reads. 462. 463. Normally, we have a blocking socket, but a read() can be interrupted 464. by a signal (resulting in a partial read). 465. 466. Note that we cannot distinguish between EOF and an interrupt when zero 467. bytes have been read. IncompleteRead() will be raised in this 468. situation. 469. 470. This function should be used when <amt> bytes "should" be present for 471. reading. If the bytes are truly not available (due to EOF), then the 472. IncompleteRead exception can be used to detect the problem. 473. """ 474. s = '' 475. while amt > 0: 476. chunk = self.fp.read(amt) 477. if not chunk: 478. raise IncompleteRead(s) 479. s += chunk 480. amt -= len(chunk) 481. return s 482. 483. def getheader(self, name, default=None): 484. if self.msg is None: 485. raise ResponseNotReady() 486. return self.msg.getheader(name, default) 487. 488. 489. class HTTPConnection: 490. 491. _http_vsn = 11 492. _http_vsn_str = 'HTTP/1.1' 493. 494. response_class = HTTPResponse 495. default_port = HTTP_PORT 496. auto_open = 1 497. debuglevel = 0 498. strict = 0 499. 500. def __init__(self, host, port=None, strict=None): 501. self.sock = None 502. self._buffer = [] 503. self.__response = None 504. self.__state = _CS_IDLE 505. self._method = None 506. 507. self._set_hostport(host, port) 508. if strict is not None: 509. self.strict = strict 510. 511. def _set_hostport(self, host, port): 512. if port is None: 513. i = host.rfind(':') 514. j = host.rfind(']') # ipv6 addresses have [...] 515. if i > j: 516. try: 517. port = int(host[i+1:]) 518. except ValueError: 519. raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) 520. host = host[:i] 521. else: 522. port = self.default_port 523. if host[0] == '[' and host[-1] == ']': 524. host = host[1:-1] 525. self.host = host 526. self.port = port 527. 528. def set_debuglevel(self, level): 529. self.debuglevel = level 530. 531. def connect(self): 532. """Connect to the host and port specified in __init__.""" 533. msg = "getaddrinfo returns an empty list" 534. for res in socket.getaddrinfo(self.host, self.port, 0, 535. socket.SOCK_STREAM): 536. af, socktype, proto, canonname, sa = res 537. try: 538. self.sock = socket.socket(af, socktype, proto) 539. if self.debuglevel > 0: 540. print "connect: (%s, %s)" % (self.host, self.port) 541. self.sock.connect(sa) 542. except socket.error, msg: 543. if self.debuglevel > 0: 544. print 'connect fail:', (self.host, self.port) 545. if self.sock: 546. self.sock.close() 547. self.sock = None 548. continue 549. break 550. if not self.sock: 551. raise socket.error, msg 552. 553. def close(self): 554. """Close the connection to the HTTP server.""" 555. if self.sock: 556. self.sock.close() # close it manually... there may be other refs 557. self.sock = None 558. if self.__response: 559. self.__response.close() 560. self.__response = None 561. self.__state = _CS_IDLE 562. 563. def send(self, str): 564. """Send `str' to the server.""" 565. if self.sock is None: 566. if self.auto_open: 567. self.connect() 568. else: 569. raise NotConnected() 570. 571. # send the data to the server. if we get a broken pipe, then close 572. # the socket. we want to reconnect when somebody tries to send again. 573. # 574. # NOTE: we DO propagate the error, though, because we cannot simply 575. # ignore the error... the caller will know if they can retry. 576. if self.debuglevel > 0: 577. print "send:", repr(str) 578. try: 579. self.sock.sendall(str) 580. except socket.error, v: 581. if v[0] == 32: # Broken pipe 582. self.close() 583. raise 584. 585. def _output(self, s): 586. """Add a line of output to the current request buffer. 587. 588. Assumes that the line does *not* end with \\r\\n. 589. """ 590. self._buffer.append(s) 591. 592. def _send_output(self): 593. """Send the currently buffered request and clear the buffer. 594. 595. Appends an extra \\r\\n to the buffer. 596. """ 597. self._buffer.extend(("", "")) 598. msg = "\r\n".join(self._buffer) 599. del self._buffer[:] 600. self.send(msg) 601. 602. def putrequest(self, method, url, skip_host=0): 603. """Send a request to the server. 604. 605. `method' specifies an HTTP request method, e.g. 'GET'. 606. `url' specifies the object being requested, e.g. '/index.html'. 607. """ 608. 609. # if a prior response has been completed, then forget about it. 610. if self.__response and self.__response.isclosed(): 611. self.__response = None 612. 613. # 614. # in certain cases, we cannot issue another request on this connection. 615. # this occurs when: 616. # 1) we are in the process of sending a request. (_CS_REQ_STARTED) 617. # 2) a response to a previous request has signalled that it is going 618. # to close the connection upon completion. 619. # 3) the headers for the previous response have not been read, thus 620. # we cannot determine whether point (2) is true. (_CS_REQ_SENT) 621. # 622. # if there is no prior response, then we can request at will. 623. # 624. # if point (2) is true, then we will have passed the socket to the 625. # response (effectively meaning, "there is no prior response"), and 626. # will open a new one when a new request is made. 627. # 628. # Note: if a prior response exists, then we *can* start a new request. 629. # We are not allowed to begin fetching the response to this new 630. # request, however, until that prior response is complete. 631. # 632. if self.__state == _CS_IDLE: 633. self.__state = _CS_REQ_STARTED 634. else: 635. raise CannotSendRequest() 636. 637. # Save the method we use, we need it later in the response phase 638. self._method = method 639. if not url: 640. url = '/' 641. str = '%s %s %s' % (method, url, self._http_vsn_str) 642. 643. self._output(str) 644. 645. if self._http_vsn == 11: 646. # Issue some standard headers for better HTTP/1.1 compliance 647. 648. if not skip_host: 649. # this header is issued *only* for HTTP/1.1 650. # connections. more specifically, this means it is 651. # only issued when the client uses the new 652. # HTTPConnection() class. backwards-compat clients 653. # will be using HTTP/1.0 and those clients may be 654. # issuing this header themselves. we should NOT issue 655. # it twice; some web servers (such as Apache) barf 656. # when they see two Host: headers 657. 658. # If we need a non-standard port,include it in the 659. # header. If the request is going through a proxy, 660. # but the host of the actual URL, not the host of the 661. # proxy. 662. 663. netloc = '' 664. if url.startswith('http'): 665. nil, netloc, nil, nil, nil = urlsplit(url) 666. 667. if netloc: 668. self.putheader('Host', netloc.encode("idna")) 669. elif self.port == HTTP_PORT: 670. self.putheader('Host', self.host.encode("idna")) 671. else: 672. self.putheader('Host', "%s:%s" % (self.host.encode("idna"), self.port)) 673. 674. # note: we are assuming that clients will not attempt to set these 675. # headers since *this* library must deal with the 676. # consequences. this also means that when the supporting 677. # libraries are updated to recognize other forms, then this 678. # code should be changed (removed or updated). 679. 680. # we only want a Content-Encoding of "identity" since we don't 681. # support encodings such as x-gzip or x-deflate. 682. self.putheader('Accept-Encoding', 'identity') 683. 684. # we can accept "chunked" Transfer-Encodings, but no others 685. # NOTE: no TE header implies *only* "chunked" 686. #self.putheader('TE', 'chunked') 687. 688. # if TE is supplied in the header, then it must appear in a 689. # Connection header. 690. #self.putheader('Connection', 'TE') 691. 692. else: 693. # For HTTP/1.0, the server will assume "not chunked" 694. pass 695. 696. def putheader(self, header, value): 697. """Send a request header line to the server. 698. 699. For example: h.putheader('Accept', 'text/html') 700. """ 701. if self.__state != _CS_REQ_STARTED: 702. raise CannotSendHeader() 703. 704. str = '%s: %s' % (header, value) 705. self._output(str) 706. 707. def endheaders(self): 708. """Indicate that the last header line has been sent to the server.""" 709. 710. if self.__state == _CS_REQ_STARTED: 711. self.__state = _CS_REQ_SENT 712. else: 713. raise CannotSendHeader() 714. 715. self._send_output() 716. 717. def request(self, method, url, body=None, headers={}): 718. """Send a complete request to the server.""" 719. 720. try: 721. self._send_request(method, url, body, headers) 722. except socket.error, v: 723. # trap 'Broken pipe' if we're allowed to automatically reconnect 724. if v[0] != 32 or not self.auto_open: 725. raise 726. # try one more time 727. self._send_request(method, url, body, headers) 728. 729. def _send_request(self, method, url, body, headers): 730. # If headers already contains a host header, then define the 731. # optional skip_host argument to putrequest(). The check is 732. # more delicate because field names are case insensitive. 733. if 'host' in [k.lower() for k in headers]: 734. self.putrequest(method, url, skip_host=1) 735. else: 736. self.putrequest(method, url) 737. 738. if body: 739. self.putheader('Content-Length', str(len(body))) 740. for hdr, value in headers.iteritems(): 741. self.putheader(hdr, value) 742. self.endheaders() 743. 744. if body: 745. self.send(body) 746. 747. def getresponse(self): 748. "Get the response from the server." 749. 750. # if a prior response has been completed, then forget about it. 751. if self.__response and self.__response.isclosed(): 752. self.__response = None 753. 754. # 755. # if a prior response exists, then it must be completed (otherwise, we 756. # cannot read this response's header to determine the connection-close 757. # behavior) 758. # 759. # note: if a prior response existed, but was connection-close, then the 760. # socket and response were made independent of this HTTPConnection 761. # object since a new request requires that we open a whole new 762. # connection 763. # 764. # this means the prior response had one of two states: 765. # 1) will_close: this connection was reset and the prior socket and 766. # response operate independently 767. # 2) persistent: the response was retained and we await its 768. # isclosed() status to become true. 769. # 770. if self.__state != _CS_REQ_SENT or self.__response: 771. raise ResponseNotReady() 772. 773. if self.debuglevel > 0: 774. response = self.response_class(self.sock, self.debuglevel, 775. strict=self.strict, 776. method=self._method) 777. else: 778. response = self.response_class(self.sock, strict=self.strict, 779. method=self._method) 780. 781. response.begin() 782. assert response.will_close != _UNKNOWN 783. self.__state = _CS_IDLE 784. 785. if response.will_close: 786. # this effectively passes the connection to the response 787. self.close() 788. else: 789. # remember this, so we can tell when it is complete 790. self.__response = response 791. 792. return response 793. 794. # The next several classes are used to define FakeSocket,a socket-like 795. # interface to an SSL connection. 796. 797. # The primary complexity comes from faking a makefile() method. The 798. # standard socket makefile() implementation calls dup() on the socket 799. # file descriptor. As a consequence, clients can call close() on the 800. # parent socket and its makefile children in any order. The underlying 801. # socket isn't closed until they are all closed. 802. 803. # The implementation uses reference counting to keep the socket open 804. # until the last client calls close(). SharedSocket keeps track of 805. # the reference counting and SharedSocketClient provides an constructor 806. # and close() method that call incref() and decref() correctly. 807. 808. class SharedSocket: 809. 810. def __init__(self, sock): 811. self.sock = sock 812. self._refcnt = 0 813. 814. def incref(self): 815. self._refcnt += 1 816. 817. def decref(self): 818. self._refcnt -= 1 819. assert self._refcnt >= 0 820. if self._refcnt == 0: 821. self.sock.close() 822. 823. def __del__(self): 824. self.sock.close() 825. 826. class SharedSocketClient: 827. 828. def __init__(self, shared): 829. self._closed = 0 830. self._shared = shared 831. self._shared.incref() 832. self._sock = shared.sock 833. 834. def close(self): 835. if not self._closed: 836. self._shared.decref() 837. self._closed = 1 838. self._shared = None 839. 840. class SSLFile(SharedSocketClient): 841. """File-like object wrapping an SSL socket.""" 842. 843. BUFSIZE = 8192 844. 845. def __init__(self, sock, ssl, bufsize=None): 846. SharedSocketClient.__init__(self, sock) 847. self._ssl = ssl 848. self._buf = '' 849. self._bufsize = bufsize or self.__class__.BUFSIZE 850. 851. def _read(self): 852. buf = '' 853. # put in a loop so that we retry on transient errors 854. while True: 855. try: 856. buf = self._ssl.read(self._bufsize) 857. except socket.sslerror, err: 858. if (err[0] == socket.SSL_ERROR_WANT_READ 859. or err[0] == socket.SSL_ERROR_WANT_WRITE): 860. continue 861. if (err[0] == socket.SSL_ERROR_ZERO_RETURN 862. or err[0] == socket.SSL_ERROR_EOF): 863. break 864. raise 865. except socket.error, err: 866. if err[0] == errno.EINTR: 867. continue 868. if err[0] == errno.EBADF: 869. # XXX socket was closed? 870. break 871. raise 872. else: 873. break 874. return buf 875. 876. def read(self, size=None): 877. L = [self._buf] 878. avail = len(self._buf) 879. while size is None or avail < size: 880. s = self._read() 881. if s == '': 882. break 883. L.append(s) 884. avail += len(s) 885. all = "".join(L) 886. if size is None: 887. self._buf = '' 888. return all 889. else: 890. self._buf = all[size:] 891. return all[:size] 892. 893. def readline(self): 894. L = [self._buf] 895. self._buf = '' 896. while 1: 897. i = L[-1].find("\n") 898. if i >= 0: 899. break 900. s = self._read() 901. if s == '': 902. break 903. L.append(s) 904. if i == -1: 905. # loop exited because there is no more data 906. return "".join(L) 907. else: 908. all = "".join(L) 909. # XXX could do enough bookkeeping not to do a 2nd search 910. i = all.find("\n") + 1 911. line = all[:i] 912. self._buf = all[i:] 913. return line 914. 915. def readlines(self, sizehint=0): 916. total = 0 917. list = [] 918. while True: 919. line = self.readline() 920. if not line: 921. break 922. list.append(line) 923. total += len(line) 924. if sizehint and total >= sizehint: 925. break 926. return list 927. 928. def fileno(self): 929. return self._sock.fileno() 930. 931. def __iter__(self): 932. return self 933. 934. def next(self): 935. line = self.readline() 936. if not line: 937. raise StopIteration 938. return line 939. 940. class FakeSocket(SharedSocketClient): 941. 942. class _closedsocket: 943. def __getattr__(self, name): 944. raise error(9, 'Bad file descriptor') 945. 946. def __init__(self, sock, ssl): 947. sock = SharedSocket(sock) 948. SharedSocketClient.__init__(self, sock) 949. self._ssl = ssl 950. 951. def close(self): 952. SharedSocketClient.close(self) 953. self._sock = self.__class__._closedsocket() 954. 955. def makefile(self, mode, bufsize=None): 956. if mode != 'r' and mode != 'rb': 957. raise UnimplementedFileMode() 958. return SSLFile(self._shared, self._ssl, bufsize) 959. 960. def send(self, stuff, flags = 0): 961. return self._ssl.write(stuff) 962. 963. sendall = send 964. 965. def recv(self, len = 1024, flags = 0): 966. return self._ssl.read(len) 967. 968. def __getattr__(self, attr): 969. return getattr(self._sock, attr) 970. 971. 972. class HTTPSConnection(HTTPConnection): 973. "This class allows communication via SSL." 974. 975. default_port = HTTPS_PORT 976. 977. def __init__(self, host, port=None, key_file=None, cert_file=None, 978. strict=None): 979. HTTPConnection.__init__(self, host, port, strict) 980. self.key_file = key_file 981. self.cert_file = cert_file 982. 983. def connect(self): 984. "Connect to a host on a given (SSL) port." 985. 986. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 987. sock.connect((self.host, self.port)) 988. ssl = socket.ssl(sock, self.key_file, self.cert_file) 989. self.sock = FakeSocket(sock, ssl) 990. 991. 992. class HTTP: 993. "Compatibility class with httplib.py from 1.5." 994. 995. _http_vsn = 10 996. _http_vsn_str = 'HTTP/1.0' 997. 998. debuglevel = 0 999. 1000. _connection_class = HTTPConnection 1001. 1002. def __init__(self, host='', port=None, strict=None): 1003. "Provide a default host, since the superclass requires one." 1004. 1005. # some joker passed 0 explicitly, meaning default port 1006. if port == 0: 1007. port = None 1008. 1009. # Note that we may pass an empty string as the host; this will throw 1010. # an error when we attempt to connect. Presumably, the client code 1011. # will call connect before then, with a proper host. 1012. self._setup(self._connection_class(host, port, strict)) 1013. 1014. def _setup(self, conn): 1015. self._conn = conn 1016. 1017. # set up delegation to flesh out interface 1018. self.send = conn.send 1019. self.putrequest = conn.putrequest 1020. self.endheaders = conn.endheaders 1021. self.set_debuglevel = conn.set_debuglevel 1022. 1023. conn._http_vsn = self._http_vsn 1024. conn._http_vsn_str = self._http_vsn_str 1025. 1026. self.file = None 1027. 1028. def connect(self, host=None, port=None): 1029. "Accept arguments to set the host/port, since the superclass doesn't." 1030. 1031. if host is not None: 1032. self._conn._set_hostport(host, port) 1033. self._conn.connect() 1034. 1035. def getfile(self): 1036. "Provide a getfile, since the superclass' does not use this concept." 1037. return self.file 1038. 1039. def putheader(self, header, *values): 1040. "The superclass allows only one value argument." 1041. self._conn.putheader(header, '\r\n\t'.join(values)) 1042. 1043. def getreply(self): 1044. """Compat definition since superclass does not define it. 1045. 1046. Returns a tuple consisting of: 1047. - server status code (e.g. '200' if all goes well) 1048. - server "reason" corresponding to status code 1049. - any RFC822 headers in the response from the server 1050. """ 1051. try: 1052. response = self._conn.getresponse() 1053. except BadStatusLine, e: 1054. ### hmm. if getresponse() ever closes the socket on a bad request, 1055. ### then we are going to have problems with self.sock 1056. 1057. ### should we keep this behavior? do people use it? 1058. # keep the socket open (as a file), and return it 1059. self.file = self._conn.sock.makefile('rb', 0) 1060. 1061. # close our socket -- we want to restart after any protocol error 1062. self.close() 1063. 1064. self.headers = None 1065. return -1, e.line, None 1066. 1067. self.headers = response.msg 1068. self.file = response.fp 1069. return response.status, response.reason, response.msg 1070. 1071. def close(self): 1072. self._conn.close() 1073. 1074. # note that self.file == response.fp, which gets closed by the 1075. # superclass. just clear the object ref here. 1076. ### hmm. messy. if status==-1, then self.file is owned by us. 1077. ### well... we aren't explicitly closing, but losing this ref will 1078. ### do it 1079. self.file = None 1080. 1081. if hasattr(socket, 'ssl'): 1082. class HTTPS(HTTP): 1083. """Compatibility with 1.5 httplib interface 1084. 1085. Python 1.5.2 did not have an HTTPS class, but it defined an 1086. interface for sending http requests that is also useful for 1087. https. 1088. """ 1089. 1090. _connection_class = HTTPSConnection 1091. 1092. def __init__(self, host='', port=None, key_file=None, cert_file=None, 1093. strict=None): 1094. # provide a default host, pass the X509 cert info 1095. 1096. # urf. compensate for bad input. 1097. if port == 0: 1098. port = None 1099. self._setup(self._connection_class(host, port, key_file, 1100. cert_file, strict)) 1101. 1102. # we never actually use these for anything, but we keep them 1103. # here for compatibility with post-1.5.2 CVS. 1104. self.key_file = key_file 1105. self.cert_file = cert_file 1106. 1107. 1108. class HTTPException(Exception): 1109. # Subclasses that define an __init__ must call Exception.__init__ 1110. # or define self.args. Otherwise, str() will fail. 1111. pass 1112. 1113. class NotConnected(HTTPException): 1114. pass 1115. 1116. class InvalidURL(HTTPException): 1117. pass 1118. 1119. class UnknownProtocol(HTTPException): 1120. def __init__(self, version): 1121. self.args = version, 1122. self.version = version 1123. 1124. class UnknownTransferEncoding(HTTPException): 1125. pass 1126. 1127. class UnimplementedFileMode(HTTPException): 1128. pass 1129. 1130. class IncompleteRead(HTTPException): 1131. def __init__(self, partial): 1132. self.args = partial, 1133. self.partial = partial 1134. 1135. class ImproperConnectionState(HTTPException): 1136. pass 1137. 1138. class CannotSendRequest(ImproperConnectionState): 1139. pass 1140. 1141. class CannotSendHeader(ImproperConnectionState): 1142. pass 1143. 1144. class ResponseNotReady(ImproperConnectionState): 1145. pass 1146. 1147. class BadStatusLine(HTTPException): 1148. def __init__(self, line): 1149. self.args = line, 1150. self.line = line 1151. 1152. # for backwards compatibility 1153. error = HTTPException 1154. 1155. class LineAndFileWrapper: 1156. """A limited file-like object for HTTP/0.9 responses.""" 1157. 1158. # The status-line parsing code calls readline(), which normally 1159. # get the HTTP status line. For a 0.9 response, however, this is 1160. # actually the first line of the body! Clients need to get a 1161. # readable file object that contains that line. 1162. 1163. def __init__(self, line, file): 1164. self._line = line 1165. self._file = file 1166. self._line_consumed = 0 1167. self._line_offset = 0 1168. self._line_left = len(line) 1169. 1170. def __getattr__(self, attr): 1171. return getattr(self._file, attr) 1172. 1173. def _done(self): 1174. # called when the last byte is read from the line. After the 1175. # call, all read methods are delegated to the underlying file 1176. # object. 1177. self._line_consumed = 1 1178. self.read = self._file.read 1179. self.readline = self._file.readline 1180. self.readlines = self._file.readlines 1181. 1182. def read(self, amt=None): 1183. assert not self._line_consumed and self._line_left 1184. if amt is None or amt > self._line_left: 1185. s = self._line[self._line_offset:] 1186. self._done() 1187. if amt is None: 1188. return s + self._file.read() 1189. else: 1190. return s + self._file.read(amt - len(s)) 1191. else: 1192. assert amt <= self._line_left 1193. i = self._line_offset 1194. j = i + amt 1195. s = self._line[i:j] 1196. self._line_offset = j 1197. self._line_left -= amt 1198. if self._line_left == 0: 1199. self._done() 1200. return s 1201. 1202. def readline(self): 1203. s = self._line[self._line_offset:] 1204. self._done() 1205. return s 1206. 1207. def readlines(self, size=None): 1208. L = [self._line[self._line_offset:]] 1209. self._done() 1210. if size is None: 1211. return L + self._file.readlines() 1212. else: 1213. return L + self._file.readlines(size) 1214. 1215. def test(): 1216. """Test this module. 1217. 1218. A hodge podge of tests collected here, because they have too many 1219. external dependencies for the regular test suite. 1220. """ 1221. 1222. import sys 1223. import getopt 1224. opts, args = getopt.getopt(sys.argv[1:], 'd') 1225. dl = 0 1226. for o, a in opts: 1227. if o == '-d': dl = dl + 1 1228. host = 'www.python.org' 1229. selector = '/' 1230. if args[0:]: host = args[0] 1231. if args[1:]: selector = args[1] 1232. h = HTTP() 1233. h.set_debuglevel(dl) 1234. h.connect(host) 1235. h.putrequest('GET', selector) 1236. h.endheaders() 1237. status, reason, headers = h.getreply() 1238. print 'status =', status 1239. print 'reason =', reason 1240. print "read", len(h.getfile().read()) 1241. print 1242. if headers: 1243. for header in headers.headers: print header.strip() 1244. print 1245. 1246. # minimal test that code to extract host from url works 1247. class HTTP11(HTTP): 1248. _http_vsn = 11 1249. _http_vsn_str = 'HTTP/1.1' 1250. 1251. h = HTTP11('www.python.org') 1252. h.putrequest('GET', 'http://www.python.org/~jeremy/') 1253. h.endheaders() 1254. h.getreply() 1255. h.close() 1256. 1257. if hasattr(socket, 'ssl'): 1258. 1259. for host, selector in (('sourceforge.net', '/projects/python'), 1260. ): 1261. print "https://%s%s" % (host, selector) 1262. hs = HTTPS() 1263. hs.set_debuglevel(dl) 1264. hs.connect(host) 1265. hs.putrequest('GET', selector) 1266. hs.endheaders() 1267. status, reason, headers = hs.getreply() 1268. print 'status =', status 1269. print 'reason =', reason 1270. print "read", len(hs.getfile().read()) 1271. print 1272. if headers: 1273. for header in headers.headers: print header.strip() 1274. print 1275. 1276. if __name__ == '__main__': 1277. test()