source file: /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/email/_parseaddr.py
file stats: 302 lines, 37 executed: 12.3% covered
1. # Copyright (C) 2002 Python Software Foundation 2. 3. """Email address parsing code. 4. 5. Lifted directly from rfc822.py. This should eventually be rewritten. 6. """ 7. 8. import time 9. from types import TupleType 10. 11. try: 12. True, False 13. except NameError: 14. True = 1 15. False = 0 16. 17. SPACE = ' ' 18. EMPTYSTRING = '' 19. COMMASPACE = ', ' 20. 21. # Parse a date field 22. _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 23. 'aug', 'sep', 'oct', 'nov', 'dec', 24. 'january', 'february', 'march', 'april', 'may', 'june', 'july', 25. 'august', 'september', 'october', 'november', 'december'] 26. 27. _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] 28. 29. # The timezone table does not include the military time zones defined 30. # in RFC822, other than Z. According to RFC1123, the description in 31. # RFC822 gets the signs wrong, so we can't rely on any such time 32. # zones. RFC1123 recommends that numeric timezone indicators be used 33. # instead of timezone names. 34. 35. _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, 36. 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) 37. 'EST': -500, 'EDT': -400, # Eastern 38. 'CST': -600, 'CDT': -500, # Central 39. 'MST': -700, 'MDT': -600, # Mountain 40. 'PST': -800, 'PDT': -700 # Pacific 41. } 42. 43. 44. def parsedate_tz(data): 45. """Convert a date string to a time tuple. 46. 47. Accounts for military timezones. 48. """ 49. data = data.split() 50. # The FWS after the comma after the day-of-week is optional, so search and 51. # adjust for this. 52. if data[0].endswith(',') or data[0].lower() in _daynames: 53. # There's a dayname here. Skip it 54. del data[0] 55. else: 56. i = data[0].rfind(',') 57. if i >= 0: 58. data[0] = data[0][i+1:] 59. if len(data) == 3: # RFC 850 date, deprecated 60. stuff = data[0].split('-') 61. if len(stuff) == 3: 62. data = stuff + data[1:] 63. if len(data) == 4: 64. s = data[3] 65. i = s.find('+') 66. if i > 0: 67. data[3:] = [s[:i], s[i+1:]] 68. else: 69. data.append('') # Dummy tz 70. if len(data) < 5: 71. return None 72. data = data[:5] 73. [dd, mm, yy, tm, tz] = data 74. mm = mm.lower() 75. if mm not in _monthnames: 76. dd, mm = mm, dd.lower() 77. if mm not in _monthnames: 78. return None 79. mm = _monthnames.index(mm) + 1 80. if mm > 12: 81. mm -= 12 82. if dd[-1] == ',': 83. dd = dd[:-1] 84. i = yy.find(':') 85. if i > 0: 86. yy, tm = tm, yy 87. if yy[-1] == ',': 88. yy = yy[:-1] 89. if not yy[0].isdigit(): 90. yy, tz = tz, yy 91. if tm[-1] == ',': 92. tm = tm[:-1] 93. tm = tm.split(':') 94. if len(tm) == 2: 95. [thh, tmm] = tm 96. tss = '0' 97. elif len(tm) == 3: 98. [thh, tmm, tss] = tm 99. else: 100. return None 101. try: 102. yy = int(yy) 103. dd = int(dd) 104. thh = int(thh) 105. tmm = int(tmm) 106. tss = int(tss) 107. except ValueError: 108. return None 109. tzoffset = None 110. tz = tz.upper() 111. if _timezones.has_key(tz): 112. tzoffset = _timezones[tz] 113. else: 114. try: 115. tzoffset = int(tz) 116. except ValueError: 117. pass 118. # Convert a timezone offset into seconds ; -0500 -> -18000 119. if tzoffset: 120. if tzoffset < 0: 121. tzsign = -1 122. tzoffset = -tzoffset 123. else: 124. tzsign = 1 125. tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) 126. tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset) 127. return tuple 128. 129. 130. def parsedate(data): 131. """Convert a time string to a time tuple.""" 132. t = parsedate_tz(data) 133. if isinstance(t, TupleType): 134. return t[:9] 135. else: 136. return t 137. 138. 139. def mktime_tz(data): 140. """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" 141. if data[9] is None: 142. # No zone info, so localtime is better assumption than GMT 143. return time.mktime(data[:8] + (-1,)) 144. else: 145. t = time.mktime(data[:8] + (0,)) 146. return t - data[9] - time.timezone 147. 148. 149. def quote(str): 150. """Add quotes around a string.""" 151. return str.replace('\\', '\\\\').replace('"', '\\"') 152. 153. 154. class AddrlistClass: 155. """Address parser class by Ben Escoto. 156. 157. To understand what this class does, it helps to have a copy of RFC 2822 in 158. front of you. 159. 160. Note: this class interface is deprecated and may be removed in the future. 161. Use rfc822.AddressList instead. 162. """ 163. 164. def __init__(self, field): 165. """Initialize a new instance. 166. 167. `field' is an unparsed address header field, containing 168. one or more addresses. 169. """ 170. self.specials = '()<>@,:;.\"[]' 171. self.pos = 0 172. self.LWS = ' \t' 173. self.CR = '\r\n' 174. self.atomends = self.specials + self.LWS + self.CR 175. # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it 176. # is obsolete syntax. RFC 2822 requires that we recognize obsolete 177. # syntax, so allow dots in phrases. 178. self.phraseends = self.atomends.replace('.', '') 179. self.field = field 180. self.commentlist = [] 181. 182. def gotonext(self): 183. """Parse up to the start of the next address.""" 184. while self.pos < len(self.field): 185. if self.field[self.pos] in self.LWS + '\n\r': 186. self.pos += 1 187. elif self.field[self.pos] == '(': 188. self.commentlist.append(self.getcomment()) 189. else: 190. break 191. 192. def getaddrlist(self): 193. """Parse all addresses. 194. 195. Returns a list containing all of the addresses. 196. """ 197. result = [] 198. while self.pos < len(self.field): 199. ad = self.getaddress() 200. if ad: 201. result += ad 202. else: 203. result.append(('', '')) 204. return result 205. 206. def getaddress(self): 207. """Parse the next address.""" 208. self.commentlist = [] 209. self.gotonext() 210. 211. oldpos = self.pos 212. oldcl = self.commentlist 213. plist = self.getphraselist() 214. 215. self.gotonext() 216. returnlist = [] 217. 218. if self.pos >= len(self.field): 219. # Bad email address technically, no domain. 220. if plist: 221. returnlist = [(SPACE.join(self.commentlist), plist[0])] 222. 223. elif self.field[self.pos] in '.@': 224. # email address is just an addrspec 225. # this isn't very efficient since we start over 226. self.pos = oldpos 227. self.commentlist = oldcl 228. addrspec = self.getaddrspec() 229. returnlist = [(SPACE.join(self.commentlist), addrspec)] 230. 231. elif self.field[self.pos] == ':': 232. # address is a group 233. returnlist = [] 234. 235. fieldlen = len(self.field) 236. self.pos += 1 237. while self.pos < len(self.field): 238. self.gotonext() 239. if self.pos < fieldlen and self.field[self.pos] == ';': 240. self.pos += 1 241. break 242. returnlist = returnlist + self.getaddress() 243. 244. elif self.field[self.pos] == '<': 245. # Address is a phrase then a route addr 246. routeaddr = self.getrouteaddr() 247. 248. if self.commentlist: 249. returnlist = [(SPACE.join(plist) + ' (' + 250. ' '.join(self.commentlist) + ')', routeaddr)] 251. else: 252. returnlist = [(SPACE.join(plist), routeaddr)] 253. 254. else: 255. if plist: 256. returnlist = [(SPACE.join(self.commentlist), plist[0])] 257. elif self.field[self.pos] in self.specials: 258. self.pos += 1 259. 260. self.gotonext() 261. if self.pos < len(self.field) and self.field[self.pos] == ',': 262. self.pos += 1 263. return returnlist 264. 265. def getrouteaddr(self): 266. """Parse a route address (Return-path value). 267. 268. This method just skips all the route stuff and returns the addrspec. 269. """ 270. if self.field[self.pos] != '<': 271. return 272. 273. expectroute = False 274. self.pos += 1 275. self.gotonext() 276. adlist = '' 277. while self.pos < len(self.field): 278. if expectroute: 279. self.getdomain() 280. expectroute = False 281. elif self.field[self.pos] == '>': 282. self.pos += 1 283. break 284. elif self.field[self.pos] == '@': 285. self.pos += 1 286. expectroute = True 287. elif self.field[self.pos] == ':': 288. self.pos += 1 289. else: 290. adlist = self.getaddrspec() 291. self.pos += 1 292. break 293. self.gotonext() 294. 295. return adlist 296. 297. def getaddrspec(self): 298. """Parse an RFC 2822 addr-spec.""" 299. aslist = [] 300. 301. self.gotonext() 302. while self.pos < len(self.field): 303. if self.field[self.pos] == '.': 304. aslist.append('.') 305. self.pos += 1 306. elif self.field[self.pos] == '"': 307. aslist.append('"%s"' % self.getquote()) 308. elif self.field[self.pos] in self.atomends: 309. break 310. else: 311. aslist.append(self.getatom()) 312. self.gotonext() 313. 314. if self.pos >= len(self.field) or self.field[self.pos] != '@': 315. return EMPTYSTRING.join(aslist) 316. 317. aslist.append('@') 318. self.pos += 1 319. self.gotonext() 320. return EMPTYSTRING.join(aslist) + self.getdomain() 321. 322. def getdomain(self): 323. """Get the complete domain name from an address.""" 324. sdlist = [] 325. while self.pos < len(self.field): 326. if self.field[self.pos] in self.LWS: 327. self.pos += 1 328. elif self.field[self.pos] == '(': 329. self.commentlist.append(self.getcomment()) 330. elif self.field[self.pos] == '[': 331. sdlist.append(self.getdomainliteral()) 332. elif self.field[self.pos] == '.': 333. self.pos += 1 334. sdlist.append('.') 335. elif self.field[self.pos] in self.atomends: 336. break 337. else: 338. sdlist.append(self.getatom()) 339. return EMPTYSTRING.join(sdlist) 340. 341. def getdelimited(self, beginchar, endchars, allowcomments=True): 342. """Parse a header fragment delimited by special characters. 343. 344. `beginchar' is the start character for the fragment. 345. If self is not looking at an instance of `beginchar' then 346. getdelimited returns the empty string. 347. 348. `endchars' is a sequence of allowable end-delimiting characters. 349. Parsing stops when one of these is encountered. 350. 351. If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed 352. within the parsed fragment. 353. """ 354. if self.field[self.pos] != beginchar: 355. return '' 356. 357. slist = [''] 358. quote = False 359. self.pos += 1 360. while self.pos < len(self.field): 361. if quote: 362. slist.append(self.field[self.pos]) 363. quote = False 364. elif self.field[self.pos] in endchars: 365. self.pos += 1 366. break 367. elif allowcomments and self.field[self.pos] == '(': 368. slist.append(self.getcomment()) 369. elif self.field[self.pos] == '\\': 370. quote = True 371. else: 372. slist.append(self.field[self.pos]) 373. self.pos += 1 374. 375. return EMPTYSTRING.join(slist) 376. 377. def getquote(self): 378. """Get a quote-delimited fragment from self's field.""" 379. return self.getdelimited('"', '"\r', False) 380. 381. def getcomment(self): 382. """Get a parenthesis-delimited fragment from self's field.""" 383. return self.getdelimited('(', ')\r', True) 384. 385. def getdomainliteral(self): 386. """Parse an RFC 2822 domain-literal.""" 387. return '[%s]' % self.getdelimited('[', ']\r', False) 388. 389. def getatom(self, atomends=None): 390. """Parse an RFC 2822 atom. 391. 392. Optional atomends specifies a different set of end token delimiters 393. (the default is to use self.atomends). This is used e.g. in 394. getphraselist() since phrase endings must not include the `.' (which 395. is legal in phrases).""" 396. atomlist = [''] 397. if atomends is None: 398. atomends = self.atomends 399. 400. while self.pos < len(self.field): 401. if self.field[self.pos] in atomends: 402. break 403. else: 404. atomlist.append(self.field[self.pos]) 405. self.pos += 1 406. 407. return EMPTYSTRING.join(atomlist) 408. 409. def getphraselist(self): 410. """Parse a sequence of RFC 2822 phrases. 411. 412. A phrase is a sequence of words, which are in turn either RFC 2822 413. atoms or quoted-strings. Phrases are canonicalized by squeezing all 414. runs of continuous whitespace into one space. 415. """ 416. plist = [] 417. 418. while self.pos < len(self.field): 419. if self.field[self.pos] in self.LWS: 420. self.pos += 1 421. elif self.field[self.pos] == '"': 422. plist.append(self.getquote()) 423. elif self.field[self.pos] == '(': 424. self.commentlist.append(self.getcomment()) 425. elif self.field[self.pos] in self.phraseends: 426. break 427. else: 428. plist.append(self.getatom(self.phraseends)) 429. 430. return plist 431. 432. class AddressList(AddrlistClass): 433. """An AddressList encapsulates a list of parsed RFC 2822 addresses.""" 434. def __init__(self, field): 435. AddrlistClass.__init__(self, field) 436. if field: 437. self.addresslist = self.getaddrlist() 438. else: 439. self.addresslist = [] 440. 441. def __len__(self): 442. return len(self.addresslist) 443. 444. def __add__(self, other): 445. # Set union 446. newaddr = AddressList(None) 447. newaddr.addresslist = self.addresslist[:] 448. for x in other.addresslist: 449. if not x in self.addresslist: 450. newaddr.addresslist.append(x) 451. return newaddr 452. 453. def __iadd__(self, other): 454. # Set union, in-place 455. for x in other.addresslist: 456. if not x in self.addresslist: 457. self.addresslist.append(x) 458. return self 459. 460. def __sub__(self, other): 461. # Set difference 462. newaddr = AddressList(None) 463. for x in self.addresslist: 464. if not x in other.addresslist: 465. newaddr.addresslist.append(x) 466. return newaddr 467. 468. def __isub__(self, other): 469. # Set difference, in-place 470. for x in other.addresslist: 471. if x in self.addresslist: 472. self.addresslist.remove(x) 473. return self 474. 475. def __getitem__(self, index): 476. # Make indexing, slices, and 'in' work 477. return self.addresslist[index]