You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

650 lines
23 KiB

15 years ago
  1. """PEP 376 implementation."""
  2. import os
  3. import re
  4. import csv
  5. import sys
  6. import zipimport
  7. from io import StringIO
  8. from hashlib import md5
  9. from packaging import logger
  10. from packaging.errors import PackagingError
  11. from packaging.version import suggest_normalized_version, VersionPredicate
  12. from packaging.metadata import Metadata
  13. __all__ = [
  14. 'Distribution', 'EggInfoDistribution', 'distinfo_dirname',
  15. 'get_distributions', 'get_distribution', 'get_file_users',
  16. 'provides_distribution', 'obsoletes_distribution',
  17. 'enable_cache', 'disable_cache', 'clear_cache',
  18. 'get_file_path', 'get_file']
  19. # TODO update docs
  20. DIST_FILES = ('INSTALLER', 'METADATA', 'RECORD', 'REQUESTED', 'RESOURCES')
  21. # Cache
  22. _cache_name = {} # maps names to Distribution instances
  23. _cache_name_egg = {} # maps names to EggInfoDistribution instances
  24. _cache_path = {} # maps paths to Distribution instances
  25. _cache_path_egg = {} # maps paths to EggInfoDistribution instances
  26. _cache_generated = False # indicates if .dist-info distributions are cached
  27. _cache_generated_egg = False # indicates if .dist-info and .egg are cached
  28. _cache_enabled = True
  29. def enable_cache():
  30. """
  31. Enables the internal cache.
  32. Note that this function will not clear the cache in any case, for that
  33. functionality see :func:`clear_cache`.
  34. """
  35. global _cache_enabled
  36. _cache_enabled = True
  37. def disable_cache():
  38. """
  39. Disables the internal cache.
  40. Note that this function will not clear the cache in any case, for that
  41. functionality see :func:`clear_cache`.
  42. """
  43. global _cache_enabled
  44. _cache_enabled = False
  45. def clear_cache():
  46. """ Clears the internal cache. """
  47. global _cache_generated, _cache_generated_egg
  48. _cache_name.clear()
  49. _cache_name_egg.clear()
  50. _cache_path.clear()
  51. _cache_path_egg.clear()
  52. _cache_generated = False
  53. _cache_generated_egg = False
  54. def _yield_distributions(include_dist, include_egg, paths):
  55. """
  56. Yield .dist-info and .egg(-info) distributions, based on the arguments
  57. :parameter include_dist: yield .dist-info distributions
  58. :parameter include_egg: yield .egg(-info) distributions
  59. """
  60. for path in paths:
  61. realpath = os.path.realpath(path)
  62. if not os.path.isdir(realpath):
  63. continue
  64. for dir in os.listdir(realpath):
  65. dist_path = os.path.join(realpath, dir)
  66. if include_dist and dir.endswith('.dist-info'):
  67. yield Distribution(dist_path)
  68. elif include_egg and (dir.endswith('.egg-info') or
  69. dir.endswith('.egg')):
  70. yield EggInfoDistribution(dist_path)
  71. def _generate_cache(use_egg_info, paths):
  72. global _cache_generated, _cache_generated_egg
  73. if _cache_generated_egg or (_cache_generated and not use_egg_info):
  74. return
  75. else:
  76. gen_dist = not _cache_generated
  77. gen_egg = use_egg_info
  78. for dist in _yield_distributions(gen_dist, gen_egg, paths):
  79. if isinstance(dist, Distribution):
  80. _cache_path[dist.path] = dist
  81. if dist.name not in _cache_name:
  82. _cache_name[dist.name] = []
  83. _cache_name[dist.name].append(dist)
  84. else:
  85. _cache_path_egg[dist.path] = dist
  86. if dist.name not in _cache_name_egg:
  87. _cache_name_egg[dist.name] = []
  88. _cache_name_egg[dist.name].append(dist)
  89. if gen_dist:
  90. _cache_generated = True
  91. if gen_egg:
  92. _cache_generated_egg = True
  93. class Distribution:
  94. """Created with the *path* of the ``.dist-info`` directory provided to the
  95. constructor. It reads the metadata contained in ``METADATA`` when it is
  96. instantiated."""
  97. name = ''
  98. """The name of the distribution."""
  99. version = ''
  100. """The version of the distribution."""
  101. metadata = None
  102. """A :class:`packaging.metadata.Metadata` instance loaded with
  103. the distribution's ``METADATA`` file."""
  104. requested = False
  105. """A boolean that indicates whether the ``REQUESTED`` metadata file is
  106. present (in other words, whether the package was installed by user
  107. request or it was installed as a dependency)."""
  108. def __init__(self, path):
  109. if _cache_enabled and path in _cache_path:
  110. self.metadata = _cache_path[path].metadata
  111. else:
  112. metadata_path = os.path.join(path, 'METADATA')
  113. self.metadata = Metadata(path=metadata_path)
  114. self.name = self.metadata['Name']
  115. self.version = self.metadata['Version']
  116. self.path = path
  117. if _cache_enabled and path not in _cache_path:
  118. _cache_path[path] = self
  119. def __repr__(self):
  120. return '<Distribution %r %s at %r>' % (
  121. self.name, self.version, self.path)
  122. def _get_records(self, local=False):
  123. results = []
  124. with self.get_distinfo_file('RECORD') as record:
  125. record_reader = csv.reader(record, delimiter=',',
  126. lineterminator='\n')
  127. for row in record_reader:
  128. missing = [None for i in range(len(row), 3)]
  129. path, checksum, size = row + missing
  130. if local:
  131. path = path.replace('/', os.sep)
  132. path = os.path.join(sys.prefix, path)
  133. results.append((path, checksum, size))
  134. return results
  135. def get_resource_path(self, relative_path):
  136. with self.get_distinfo_file('RESOURCES') as resources_file:
  137. resources_reader = csv.reader(resources_file, delimiter=',',
  138. lineterminator='\n')
  139. for relative, destination in resources_reader:
  140. if relative == relative_path:
  141. return destination
  142. raise KeyError(
  143. 'no resource file with relative path %r is installed' %
  144. relative_path)
  145. def list_installed_files(self, local=False):
  146. """
  147. Iterates over the ``RECORD`` entries and returns a tuple
  148. ``(path, md5, size)`` for each line. If *local* is ``True``,
  149. the returned path is transformed into a local absolute path.
  150. Otherwise the raw value from RECORD is returned.
  151. A local absolute path is an absolute path in which occurrences of
  152. ``'/'`` have been replaced by the system separator given by ``os.sep``.
  153. :parameter local: flag to say if the path should be returned as a local
  154. absolute path
  155. :type local: boolean
  156. :returns: iterator of (path, md5, size)
  157. """
  158. for result in self._get_records(local):
  159. yield result
  160. def uses(self, path):
  161. """
  162. Returns ``True`` if path is listed in ``RECORD``. *path* can be a local
  163. absolute path or a relative ``'/'``-separated path.
  164. :rtype: boolean
  165. """
  166. for p, checksum, size in self._get_records():
  167. local_absolute = os.path.join(sys.prefix, p)
  168. if path == p or path == local_absolute:
  169. return True
  170. return False
  171. def get_distinfo_file(self, path, binary=False):
  172. """
  173. Returns a file located under the ``.dist-info`` directory. Returns a
  174. ``file`` instance for the file pointed by *path*.
  175. :parameter path: a ``'/'``-separated path relative to the
  176. ``.dist-info`` directory or an absolute path;
  177. If *path* is an absolute path and doesn't start
  178. with the ``.dist-info`` directory path,
  179. a :class:`PackagingError` is raised
  180. :type path: string
  181. :parameter binary: If *binary* is ``True``, opens the file in read-only
  182. binary mode (``rb``), otherwise opens it in
  183. read-only mode (``r``).
  184. :rtype: file object
  185. """
  186. open_flags = 'r'
  187. if binary:
  188. open_flags += 'b'
  189. # Check if it is an absolute path # XXX use relpath, add tests
  190. if path.find(os.sep) >= 0:
  191. # it's an absolute path?
  192. distinfo_dirname, path = path.split(os.sep)[-2:]
  193. if distinfo_dirname != self.path.split(os.sep)[-1]:
  194. raise PackagingError(
  195. 'dist-info file %r does not belong to the %r %s '
  196. 'distribution' % (path, self.name, self.version))
  197. # The file must be relative
  198. if path not in DIST_FILES:
  199. raise PackagingError('invalid path for a dist-info file: %r' %
  200. path)
  201. path = os.path.join(self.path, path)
  202. return open(path, open_flags)
  203. def list_distinfo_files(self, local=False):
  204. """
  205. Iterates over the ``RECORD`` entries and returns paths for each line if
  206. the path is pointing to a file located in the ``.dist-info`` directory
  207. or one of its subdirectories.
  208. :parameter local: If *local* is ``True``, each returned path is
  209. transformed into a local absolute path. Otherwise the
  210. raw value from ``RECORD`` is returned.
  211. :type local: boolean
  212. :returns: iterator of paths
  213. """
  214. for path, checksum, size in self._get_records(local):
  215. # XXX add separator or use real relpath algo
  216. if path.startswith(self.path):
  217. yield path
  218. def __eq__(self, other):
  219. return isinstance(other, Distribution) and self.path == other.path
  220. # See http://docs.python.org/reference/datamodel#object.__hash__
  221. __hash__ = object.__hash__
  222. class EggInfoDistribution:
  223. """Created with the *path* of the ``.egg-info`` directory or file provided
  224. to the constructor. It reads the metadata contained in the file itself, or
  225. if the given path happens to be a directory, the metadata is read from the
  226. file ``PKG-INFO`` under that directory."""
  227. name = ''
  228. """The name of the distribution."""
  229. version = ''
  230. """The version of the distribution."""
  231. metadata = None
  232. """A :class:`packaging.metadata.Metadata` instance loaded with
  233. the distribution's ``METADATA`` file."""
  234. _REQUIREMENT = re.compile(
  235. r'(?P<name>[-A-Za-z0-9_.]+)\s*'
  236. r'(?P<first>(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)?\s*'
  237. r'(?P<rest>(?:\s*,\s*(?:<|<=|!=|==|>=|>)[-A-Za-z0-9_.]+)*)\s*'
  238. r'(?P<extras>\[.*\])?')
  239. def __init__(self, path):
  240. self.path = path
  241. if _cache_enabled and path in _cache_path_egg:
  242. self.metadata = _cache_path_egg[path].metadata
  243. self.name = self.metadata['Name']
  244. self.version = self.metadata['Version']
  245. return
  246. # reused from Distribute's pkg_resources
  247. def yield_lines(strs):
  248. """Yield non-empty/non-comment lines of a ``basestring``
  249. or sequence"""
  250. if isinstance(strs, str):
  251. for s in strs.splitlines():
  252. s = s.strip()
  253. # skip blank lines/comments
  254. if s and not s.startswith('#'):
  255. yield s
  256. else:
  257. for ss in strs:
  258. for s in yield_lines(ss):
  259. yield s
  260. requires = None
  261. if path.endswith('.egg'):
  262. if os.path.isdir(path):
  263. meta_path = os.path.join(path, 'EGG-INFO', 'PKG-INFO')
  264. self.metadata = Metadata(path=meta_path)
  265. try:
  266. req_path = os.path.join(path, 'EGG-INFO', 'requires.txt')
  267. with open(req_path, 'r') as fp:
  268. requires = fp.read()
  269. except IOError:
  270. requires = None
  271. else:
  272. # FIXME handle the case where zipfile is not available
  273. zipf = zipimport.zipimporter(path)
  274. fileobj = StringIO(
  275. zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
  276. self.metadata = Metadata(fileobj=fileobj)
  277. try:
  278. requires = zipf.get_data('EGG-INFO/requires.txt')
  279. except IOError:
  280. requires = None
  281. self.name = self.metadata['Name']
  282. self.version = self.metadata['Version']
  283. elif path.endswith('.egg-info'):
  284. if os.path.isdir(path):
  285. path = os.path.join(path, 'PKG-INFO')
  286. try:
  287. with open(os.path.join(path, 'requires.txt'), 'r') as fp:
  288. requires = fp.read()
  289. except IOError:
  290. requires = None
  291. self.metadata = Metadata(path=path)
  292. self.name = self.metadata['Name']
  293. self.version = self.metadata['Version']
  294. else:
  295. raise ValueError('path must end with .egg-info or .egg, got %r' %
  296. path)
  297. if requires is not None:
  298. if self.metadata['Metadata-Version'] == '1.1':
  299. # we can't have 1.1 metadata *and* Setuptools requires
  300. for field in ('Obsoletes', 'Requires', 'Provides'):
  301. del self.metadata[field]
  302. reqs = []
  303. if requires is not None:
  304. for line in yield_lines(requires):
  305. if line.startswith('['):
  306. logger.warning(
  307. 'extensions in requires.txt are not supported '
  308. '(used by %r %s)', self.name, self.version)
  309. break
  310. else:
  311. match = self._REQUIREMENT.match(line.strip())
  312. if not match:
  313. # this happens when we encounter extras; since they
  314. # are written at the end of the file we just exit
  315. break
  316. else:
  317. if match.group('extras'):
  318. msg = ('extra requirements are not supported '
  319. '(used by %r %s)', self.name, self.version)
  320. logger.warning(msg, self.name)
  321. name = match.group('name')
  322. version = None
  323. if match.group('first'):
  324. version = match.group('first')
  325. if match.group('rest'):
  326. version += match.group('rest')
  327. version = version.replace(' ', '') # trim spaces
  328. if version is None:
  329. reqs.append(name)
  330. else:
  331. reqs.append('%s (%s)' % (name, version))
  332. if len(reqs) > 0:
  333. self.metadata['Requires-Dist'] += reqs
  334. if _cache_enabled:
  335. _cache_path_egg[self.path] = self
  336. def __repr__(self):
  337. return '<EggInfoDistribution %r %s at %r>' % (
  338. self.name, self.version, self.path)
  339. def list_installed_files(self, local=False):
  340. def _md5(path):
  341. with open(path, 'rb') as f:
  342. content = f.read()
  343. return md5(content).hexdigest()
  344. def _size(path):
  345. return os.stat(path).st_size
  346. path = self.path
  347. if local:
  348. path = path.replace('/', os.sep)
  349. # XXX What about scripts and data files ?
  350. if os.path.isfile(path):
  351. return [(path, _md5(path), _size(path))]
  352. else:
  353. files = []
  354. for root, dir, files_ in os.walk(path):
  355. for item in files_:
  356. item = os.path.join(root, item)
  357. files.append((item, _md5(item), _size(item)))
  358. return files
  359. return []
  360. def uses(self, path):
  361. return False
  362. def __eq__(self, other):
  363. return (isinstance(other, EggInfoDistribution) and
  364. self.path == other.path)
  365. # See http://docs.python.org/reference/datamodel#object.__hash__
  366. __hash__ = object.__hash__
  367. def distinfo_dirname(name, version):
  368. """
  369. The *name* and *version* parameters are converted into their
  370. filename-escaped form, i.e. any ``'-'`` characters are replaced
  371. with ``'_'`` other than the one in ``'dist-info'`` and the one
  372. separating the name from the version number.
  373. :parameter name: is converted to a standard distribution name by replacing
  374. any runs of non- alphanumeric characters with a single
  375. ``'-'``.
  376. :type name: string
  377. :parameter version: is converted to a standard version string. Spaces
  378. become dots, and all other non-alphanumeric characters
  379. (except dots) become dashes, with runs of multiple
  380. dashes condensed to a single dash.
  381. :type version: string
  382. :returns: directory name
  383. :rtype: string"""
  384. file_extension = '.dist-info'
  385. name = name.replace('-', '_')
  386. normalized_version = suggest_normalized_version(version)
  387. # Because this is a lookup procedure, something will be returned even if
  388. # it is a version that cannot be normalized
  389. if normalized_version is None:
  390. # Unable to achieve normality?
  391. normalized_version = version
  392. return '-'.join([name, normalized_version]) + file_extension
  393. def get_distributions(use_egg_info=False, paths=None):
  394. """
  395. Provides an iterator that looks for ``.dist-info`` directories in
  396. ``sys.path`` and returns :class:`Distribution` instances for each one of
  397. them. If the parameters *use_egg_info* is ``True``, then the ``.egg-info``
  398. files and directores are iterated as well.
  399. :rtype: iterator of :class:`Distribution` and :class:`EggInfoDistribution`
  400. instances
  401. """
  402. if paths is None:
  403. paths = sys.path
  404. if not _cache_enabled:
  405. for dist in _yield_distributions(True, use_egg_info, paths):
  406. yield dist
  407. else:
  408. _generate_cache(use_egg_info, paths)
  409. for dist in _cache_path.values():
  410. yield dist
  411. if use_egg_info:
  412. for dist in _cache_path_egg.values():
  413. yield dist
  414. def get_distribution(name, use_egg_info=False, paths=None):
  415. """
  416. Scans all elements in ``sys.path`` and looks for all directories
  417. ending with ``.dist-info``. Returns a :class:`Distribution`
  418. corresponding to the ``.dist-info`` directory that contains the
  419. ``METADATA`` that matches *name* for the *name* metadata field.
  420. If no distribution exists with the given *name* and the parameter
  421. *use_egg_info* is set to ``True``, then all files and directories ending
  422. with ``.egg-info`` are scanned. A :class:`EggInfoDistribution` instance is
  423. returned if one is found that has metadata that matches *name* for the
  424. *name* metadata field.
  425. This function only returns the first result found, as no more than one
  426. value is expected. If the directory is not found, ``None`` is returned.
  427. :rtype: :class:`Distribution` or :class:`EggInfoDistribution` or None
  428. """
  429. if paths is None:
  430. paths = sys.path
  431. if not _cache_enabled:
  432. for dist in _yield_distributions(True, use_egg_info, paths):
  433. if dist.name == name:
  434. return dist
  435. else:
  436. _generate_cache(use_egg_info, paths)
  437. if name in _cache_name:
  438. return _cache_name[name][0]
  439. elif use_egg_info and name in _cache_name_egg:
  440. return _cache_name_egg[name][0]
  441. else:
  442. return None
  443. def obsoletes_distribution(name, version=None, use_egg_info=False):
  444. """
  445. Iterates over all distributions to find which distributions obsolete
  446. *name*.
  447. If a *version* is provided, it will be used to filter the results.
  448. If the argument *use_egg_info* is set to ``True``, then ``.egg-info``
  449. distributions will be considered as well.
  450. :type name: string
  451. :type version: string
  452. :parameter name:
  453. """
  454. for dist in get_distributions(use_egg_info):
  455. obsoleted = (dist.metadata['Obsoletes-Dist'] +
  456. dist.metadata['Obsoletes'])
  457. for obs in obsoleted:
  458. o_components = obs.split(' ', 1)
  459. if len(o_components) == 1 or version is None:
  460. if name == o_components[0]:
  461. yield dist
  462. break
  463. else:
  464. try:
  465. predicate = VersionPredicate(obs)
  466. except ValueError:
  467. raise PackagingError(
  468. 'distribution %r has ill-formed obsoletes field: '
  469. '%r' % (dist.name, obs))
  470. if name == o_components[0] and predicate.match(version):
  471. yield dist
  472. break
  473. def provides_distribution(name, version=None, use_egg_info=False):
  474. """
  475. Iterates over all distributions to find which distributions provide *name*.
  476. If a *version* is provided, it will be used to filter the results. Scans
  477. all elements in ``sys.path`` and looks for all directories ending with
  478. ``.dist-info``. Returns a :class:`Distribution` corresponding to the
  479. ``.dist-info`` directory that contains a ``METADATA`` that matches *name*
  480. for the name metadata. If the argument *use_egg_info* is set to ``True``,
  481. then all files and directories ending with ``.egg-info`` are considered
  482. as well and returns an :class:`EggInfoDistribution` instance.
  483. This function only returns the first result found, since no more than
  484. one values are expected. If the directory is not found, returns ``None``.
  485. :parameter version: a version specifier that indicates the version
  486. required, conforming to the format in ``PEP-345``
  487. :type name: string
  488. :type version: string
  489. """
  490. predicate = None
  491. if not version is None:
  492. try:
  493. predicate = VersionPredicate(name + ' (' + version + ')')
  494. except ValueError:
  495. raise PackagingError('invalid name or version: %r, %r' %
  496. (name, version))
  497. for dist in get_distributions(use_egg_info):
  498. provided = dist.metadata['Provides-Dist'] + dist.metadata['Provides']
  499. for p in provided:
  500. p_components = p.rsplit(' ', 1)
  501. if len(p_components) == 1 or predicate is None:
  502. if name == p_components[0]:
  503. yield dist
  504. break
  505. else:
  506. p_name, p_ver = p_components
  507. if len(p_ver) < 2 or p_ver[0] != '(' or p_ver[-1] != ')':
  508. raise PackagingError(
  509. 'distribution %r has invalid Provides field: %r' %
  510. (dist.name, p))
  511. p_ver = p_ver[1:-1] # trim off the parenthesis
  512. if p_name == name and predicate.match(p_ver):
  513. yield dist
  514. break
  515. def get_file_users(path):
  516. """
  517. Iterates over all distributions to find out which distributions use
  518. *path*.
  519. :parameter path: can be a local absolute path or a relative
  520. ``'/'``-separated path.
  521. :type path: string
  522. :rtype: iterator of :class:`Distribution` instances
  523. """
  524. for dist in get_distributions():
  525. if dist.uses(path):
  526. yield dist
  527. def get_file_path(distribution_name, relative_path):
  528. """Return the path to a resource file."""
  529. dist = get_distribution(distribution_name)
  530. if dist is not None:
  531. return dist.get_resource_path(relative_path)
  532. raise LookupError('no distribution named %r found' % distribution_name)
  533. def get_file(distribution_name, relative_path, *args, **kwargs):
  534. """Open and return a resource file."""
  535. return open(get_file_path(distribution_name, relative_path),
  536. *args, **kwargs)