source file: /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/mimetypes.py
file stats: 160 lines, 54 executed: 33.8% covered
1. """Guess the MIME type of a file. 2. 3. This module defines two useful functions: 4. 5. guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. 6. 7. guess_extension(type, strict=1) -- guess the extension for a given MIME type. 8. 9. It also contains the following, for tuning the behavior: 10. 11. Data: 12. 13. knownfiles -- list of files to parse 14. inited -- flag set when init() has been called 15. suffix_map -- dictionary mapping suffixes to suffixes 16. encodings_map -- dictionary mapping suffixes to encodings 17. types_map -- dictionary mapping suffixes to types 18. 19. Functions: 20. 21. init([files]) -- parse a list of files, default knownfiles 22. read_mime_types(file) -- parse one file, return a dictionary or None 23. """ 24. 25. import os 26. import posixpath 27. import urllib 28. 29. __all__ = [ 30. "guess_type","guess_extension","guess_all_extensions", 31. "add_type","read_mime_types","init" 32. ] 33. 34. knownfiles = [ 35. "/etc/mime.types", 36. "/usr/local/etc/httpd/conf/mime.types", 37. "/usr/local/lib/netscape/mime.types", 38. "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 39. "/usr/local/etc/mime.types", # Apache 1.3 40. ] 41. 42. inited = False 43. 44. 45. class MimeTypes: 46. """MIME-types datastore. 47. 48. This datastore can handle information from mime.types-style files 49. and supports basic determination of MIME type from a filename or 50. URL, and can guess a reasonable extension given a MIME type. 51. """ 52. 53. def __init__(self, filenames=(), strict=True): 54. if not inited: 55. init() 56. self.encodings_map = encodings_map.copy() 57. self.suffix_map = suffix_map.copy() 58. self.types_map = ({}, {}) # dict for (non-strict, strict) 59. self.types_map_inv = ({}, {}) 60. for (ext, type) in types_map.items(): 61. self.add_type(type, ext, True) 62. for (ext, type) in common_types.items(): 63. self.add_type(type, ext, False) 64. for name in filenames: 65. self.read(name, strict) 66. 67. def add_type(self, type, ext, strict=True): 68. """Add a mapping between a type and an extension. 69. 70. When the extension is already known, the new 71. type will replace the old one. When the type 72. is already known the extension will be added 73. to the list of known extensions. 74. 75. If strict is true, information will be added to 76. list of standard types, else to the list of non-standard 77. types. 78. """ 79. self.types_map[strict][ext] = type 80. exts = self.types_map_inv[strict].setdefault(type, []) 81. if ext not in exts: 82. exts.append(ext) 83. 84. def guess_type(self, url, strict=True): 85. """Guess the type of a file based on its URL. 86. 87. Return value is a tuple (type, encoding) where type is None if 88. the type can't be guessed (no or unknown suffix) or a string 89. of the form type/subtype, usable for a MIME Content-type 90. header; and encoding is None for no encoding or the name of 91. the program used to encode (e.g. compress or gzip). The 92. mappings are table driven. Encoding suffixes are case 93. sensitive; type suffixes are first tried case sensitive, then 94. case insensitive. 95. 96. The suffixes .tgz, .taz and .tz (case sensitive!) are all 97. mapped to '.tar.gz'. (This is table-driven too, using the 98. dictionary suffix_map.) 99. 100. Optional `strict' argument when False adds a bunch of commonly found, 101. but non-standard types. 102. """ 103. scheme, url = urllib.splittype(url) 104. if scheme == 'data': 105. # syntax of data URLs: 106. # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 107. # mediatype := [ type "/" subtype ] *( ";" parameter ) 108. # data := *urlchar 109. # parameter := attribute "=" value 110. # type/subtype defaults to "text/plain" 111. comma = url.find(',') 112. if comma < 0: 113. # bad data URL 114. return None, None 115. semi = url.find(';', 0, comma) 116. if semi >= 0: 117. type = url[:semi] 118. else: 119. type = url[:comma] 120. if '=' in type or '/' not in type: 121. type = 'text/plain' 122. return type, None # never compressed, so encoding is None 123. base, ext = posixpath.splitext(url) 124. while ext in self.suffix_map: 125. base, ext = posixpath.splitext(base + self.suffix_map[ext]) 126. if ext in self.encodings_map: 127. encoding = self.encodings_map[ext] 128. base, ext = posixpath.splitext(base) 129. else: 130. encoding = None 131. types_map = self.types_map[True] 132. if ext in types_map: 133. return types_map[ext], encoding 134. elif ext.lower() in types_map: 135. return types_map[ext.lower()], encoding 136. elif strict: 137. return None, encoding 138. types_map = self.types_map[False] 139. if ext in types_map: 140. return types_map[ext], encoding 141. elif ext.lower() in types_map: 142. return types_map[ext.lower()], encoding 143. else: 144. return None, encoding 145. 146. def guess_all_extensions(self, type, strict=True): 147. """Guess the extensions for a file based on its MIME type. 148. 149. Return value is a list of strings giving the possible filename 150. extensions, including the leading dot ('.'). The extension is not 151. guaranteed to have been associated with any particular data stream, 152. but would be mapped to the MIME type `type' by guess_type(). 153. 154. Optional `strict' argument when false adds a bunch of commonly found, 155. but non-standard types. 156. """ 157. type = type.lower() 158. extensions = self.types_map_inv[True].get(type, []) 159. if not strict: 160. for ext in self.types_map_inv[False].get(type, []): 161. if ext not in extensions: 162. extensions.append(ext) 163. return extensions 164. 165. def guess_extension(self, type, strict=True): 166. """Guess the extension for a file based on its MIME type. 167. 168. Return value is a string giving a filename extension, 169. including the leading dot ('.'). The extension is not 170. guaranteed to have been associated with any particular data 171. stream, but would be mapped to the MIME type `type' by 172. guess_type(). If no extension can be guessed for `type', None 173. is returned. 174. 175. Optional `strict' argument when false adds a bunch of commonly found, 176. but non-standard types. 177. """ 178. extensions = self.guess_all_extensions(type, strict) 179. if not extensions: 180. return None 181. return extensions[0] 182. 183. def read(self, filename, strict=True): 184. """ 185. Read a single mime.types-format file, specified by pathname. 186. 187. If strict is true, information will be added to 188. list of standard types, else to the list of non-standard 189. types. 190. """ 191. fp = open(filename) 192. self.readfp(fp, strict) 193. fp.close() 194. 195. def readfp(self, fp, strict=True): 196. """ 197. Read a single mime.types-format file. 198. 199. If strict is true, information will be added to 200. list of standard types, else to the list of non-standard 201. types. 202. """ 203. while 1: 204. line = fp.readline() 205. if not line: 206. break 207. words = line.split() 208. for i in range(len(words)): 209. if words[i][0] == '#': 210. del words[i:] 211. break 212. if not words: 213. continue 214. type, suffixes = words[0], words[1:] 215. for suff in suffixes: 216. self.add_type(type, '.' + suff, strict) 217. 218. def guess_type(url, strict=True): 219. """Guess the type of a file based on its URL. 220. 221. Return value is a tuple (type, encoding) where type is None if the 222. type can't be guessed (no or unknown suffix) or a string of the 223. form type/subtype, usable for a MIME Content-type header; and 224. encoding is None for no encoding or the name of the program used 225. to encode (e.g. compress or gzip). The mappings are table 226. driven. Encoding suffixes are case sensitive; type suffixes are 227. first tried case sensitive, then case insensitive. 228. 229. The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped 230. to ".tar.gz". (This is table-driven too, using the dictionary 231. suffix_map). 232. 233. Optional `strict' argument when false adds a bunch of commonly found, but 234. non-standard types. 235. """ 236. init() 237. return guess_type(url, strict) 238. 239. 240. def guess_all_extensions(type, strict=True): 241. """Guess the extensions for a file based on its MIME type. 242. 243. Return value is a list of strings giving the possible filename 244. extensions, including the leading dot ('.'). The extension is not 245. guaranteed to have been associated with any particular data 246. stream, but would be mapped to the MIME type `type' by 247. guess_type(). If no extension can be guessed for `type', None 248. is returned. 249. 250. Optional `strict' argument when false adds a bunch of commonly found, 251. but non-standard types. 252. """ 253. init() 254. return guess_all_extensions(type, strict) 255. 256. def guess_extension(type, strict=True): 257. """Guess the extension for a file based on its MIME type. 258. 259. Return value is a string giving a filename extension, including the 260. leading dot ('.'). The extension is not guaranteed to have been 261. associated with any particular data stream, but would be mapped to the 262. MIME type `type' by guess_type(). If no extension can be guessed for 263. `type', None is returned. 264. 265. Optional `strict' argument when false adds a bunch of commonly found, 266. but non-standard types. 267. """ 268. init() 269. return guess_extension(type, strict) 270. 271. def add_type(type, ext, strict=True): 272. """Add a mapping between a type and an extension. 273. 274. When the extension is already known, the new 275. type will replace the old one. When the type 276. is already known the extension will be added 277. to the list of known extensions. 278. 279. If strict is true, information will be added to 280. list of standard types, else to the list of non-standard 281. types. 282. """ 283. init() 284. return add_type(type, ext, strict) 285. 286. 287. def init(files=None): 288. global guess_all_extensions, guess_extension, guess_type 289. global suffix_map, types_map, encodings_map, common_types 290. global add_type, inited 291. inited = True 292. db = MimeTypes() 293. if files is None: 294. files = knownfiles 295. for file in files: 296. if os.path.isfile(file): 297. db.readfp(open(file)) 298. encodings_map = db.encodings_map 299. suffix_map = db.suffix_map 300. types_map = db.types_map[True] 301. guess_all_extensions = db.guess_all_extensions 302. guess_extension = db.guess_extension 303. guess_type = db.guess_type 304. add_type = db.add_type 305. common_types = db.types_map[False] 306. 307. 308. def read_mime_types(file): 309. try: 310. f = open(file) 311. except IOError: 312. return None 313. db = MimeTypes() 314. db.readfp(f, True) 315. return db.types_map[True] 316. 317. 318. suffix_map = { 319. '.tgz': '.tar.gz', 320. '.taz': '.tar.gz', 321. '.tz': '.tar.gz', 322. } 323. 324. encodings_map = { 325. '.gz': 'gzip', 326. '.Z': 'compress', 327. } 328. 329. # Before adding new types, make sure they are either registered with IANA, at 330. # http://www.isi.edu/in-notes/iana/assignments/media-types 331. # or extensions, i.e. using the x- prefix 332. 333. # If you add to these, please keep them sorted! 334. types_map = { 335. '.a' : 'application/octet-stream', 336. '.ai' : 'application/postscript', 337. '.aif' : 'audio/x-aiff', 338. '.aifc' : 'audio/x-aiff', 339. '.aiff' : 'audio/x-aiff', 340. '.au' : 'audio/basic', 341. '.avi' : 'video/x-msvideo', 342. '.bat' : 'text/plain', 343. '.bcpio' : 'application/x-bcpio', 344. '.bin' : 'application/octet-stream', 345. '.bmp' : 'image/x-ms-bmp', 346. '.c' : 'text/plain', 347. # Duplicates :( 348. '.cdf' : 'application/x-cdf', 349. '.cdf' : 'application/x-netcdf', 350. '.cpio' : 'application/x-cpio', 351. '.csh' : 'application/x-csh', 352. '.css' : 'text/css', 353. '.dll' : 'application/octet-stream', 354. '.doc' : 'application/msword', 355. '.dot' : 'application/msword', 356. '.dvi' : 'application/x-dvi', 357. '.eml' : 'message/rfc822', 358. '.eps' : 'application/postscript', 359. '.etx' : 'text/x-setext', 360. '.exe' : 'application/octet-stream', 361. '.gif' : 'image/gif', 362. '.gtar' : 'application/x-gtar', 363. '.h' : 'text/plain', 364. '.hdf' : 'application/x-hdf', 365. '.htm' : 'text/html', 366. '.html' : 'text/html', 367. '.ief' : 'image/ief', 368. '.jpe' : 'image/jpeg', 369. '.jpeg' : 'image/jpeg', 370. '.jpg' : 'image/jpeg', 371. '.js' : 'application/x-javascript', 372. '.ksh' : 'text/plain', 373. '.latex' : 'application/x-latex', 374. '.m1v' : 'video/mpeg', 375. '.man' : 'application/x-troff-man', 376. '.me' : 'application/x-troff-me', 377. '.mht' : 'message/rfc822', 378. '.mhtml' : 'message/rfc822', 379. '.mif' : 'application/x-mif', 380. '.mov' : 'video/quicktime', 381. '.movie' : 'video/x-sgi-movie', 382. '.mp2' : 'audio/mpeg', 383. '.mp3' : 'audio/mpeg', 384. '.mpa' : 'video/mpeg', 385. '.mpe' : 'video/mpeg', 386. '.mpeg' : 'video/mpeg', 387. '.mpg' : 'video/mpeg', 388. '.ms' : 'application/x-troff-ms', 389. '.nc' : 'application/x-netcdf', 390. '.nws' : 'message/rfc822', 391. '.o' : 'application/octet-stream', 392. '.obj' : 'application/octet-stream', 393. '.oda' : 'application/oda', 394. '.p12' : 'application/x-pkcs12', 395. '.p7c' : 'application/pkcs7-mime', 396. '.pbm' : 'image/x-portable-bitmap', 397. '.pdf' : 'application/pdf', 398. '.pfx' : 'application/x-pkcs12', 399. '.pgm' : 'image/x-portable-graymap', 400. '.pl' : 'text/plain', 401. '.png' : 'image/png', 402. '.pnm' : 'image/x-portable-anymap', 403. '.pot' : 'application/vnd.ms-powerpoint', 404. '.ppa' : 'application/vnd.ms-powerpoint', 405. '.ppm' : 'image/x-portable-pixmap', 406. '.pps' : 'application/vnd.ms-powerpoint', 407. '.ppt' : 'application/vnd.ms-powerpoint', 408. '.ps' : 'application/postscript', 409. '.pwz' : 'application/vnd.ms-powerpoint', 410. '.py' : 'text/x-python', 411. '.pyc' : 'application/x-python-code', 412. '.pyo' : 'application/x-python-code', 413. '.qt' : 'video/quicktime', 414. '.ra' : 'audio/x-pn-realaudio', 415. '.ram' : 'application/x-pn-realaudio', 416. '.ras' : 'image/x-cmu-raster', 417. '.rdf' : 'application/xml', 418. '.rgb' : 'image/x-rgb', 419. '.roff' : 'application/x-troff', 420. '.rtx' : 'text/richtext', 421. '.sgm' : 'text/x-sgml', 422. '.sgml' : 'text/x-sgml', 423. '.sh' : 'application/x-sh', 424. '.shar' : 'application/x-shar', 425. '.snd' : 'audio/basic', 426. '.so' : 'application/octet-stream', 427. '.src' : 'application/x-wais-source', 428. '.sv4cpio': 'application/x-sv4cpio', 429. '.sv4crc' : 'application/x-sv4crc', 430. '.swf' : 'application/x-shockwave-flash', 431. '.t' : 'application/x-troff', 432. '.tar' : 'application/x-tar', 433. '.tcl' : 'application/x-tcl', 434. '.tex' : 'application/x-tex', 435. '.texi' : 'application/x-texinfo', 436. '.texinfo': 'application/x-texinfo', 437. '.tif' : 'image/tiff', 438. '.tiff' : 'image/tiff', 439. '.tr' : 'application/x-troff', 440. '.tsv' : 'text/tab-separated-values', 441. '.txt' : 'text/plain', 442. '.ustar' : 'application/x-ustar', 443. '.vcf' : 'text/x-vcard', 444. '.wav' : 'audio/x-wav', 445. '.wiz' : 'application/msword', 446. '.xbm' : 'image/x-xbitmap', 447. '.xlb' : 'application/vnd.ms-excel', 448. # Duplicates :( 449. '.xls' : 'application/excel', 450. '.xls' : 'application/vnd.ms-excel', 451. '.xml' : 'text/xml', 452. '.xpm' : 'image/x-xpixmap', 453. '.xsl' : 'application/xml', 454. '.xwd' : 'image/x-xwindowdump', 455. '.zip' : 'application/zip', 456. } 457. 458. # These are non-standard types, commonly found in the wild. They will only 459. # match if strict=0 flag is given to the API methods. 460. 461. # Please sort these too 462. common_types = { 463. '.jpg' : 'image/jpg', 464. '.mid' : 'audio/midi', 465. '.midi': 'audio/midi', 466. '.pct' : 'image/pict', 467. '.pic' : 'image/pict', 468. '.pict': 'image/pict', 469. '.rtf' : 'application/rtf', 470. '.xul' : 'text/xul' 471. } 472. 473. 474. if __name__ == '__main__': 475. import sys 476. import getopt 477. 478. USAGE = """\ 479. Usage: mimetypes.py [options] type 480. 481. Options: 482. --help / -h -- print this message and exit 483. --lenient / -l -- additionally search of some common, but non-standard 484. types. 485. --extension / -e -- guess extension instead of type 486. 487. More than one type argument may be given. 488. """ 489. 490. def usage(code, msg=''): 491. print USAGE 492. if msg: print msg 493. sys.exit(code) 494. 495. try: 496. opts, args = getopt.getopt(sys.argv[1:], 'hle', 497. ['help', 'lenient', 'extension']) 498. except getopt.error, msg: 499. usage(1, msg) 500. 501. strict = 1 502. extension = 0 503. for opt, arg in opts: 504. if opt in ('-h', '--help'): 505. usage(0) 506. elif opt in ('-l', '--lenient'): 507. strict = 0 508. elif opt in ('-e', '--extension'): 509. extension = 1 510. for gtype in args: 511. if extension: 512. guess = guess_extension(gtype, strict) 513. if not guess: print "I don't know anything about type", gtype 514. else: print guess 515. else: 516. guess, encoding = guess_type(gtype, strict) 517. if not guess: print "I don't know anything about type", gtype 518. else: print 'type:', guess, 'encoding:', encoding