cache.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. """Cache Management
  2. """
  3. # The following comment should be removed at some point in the future.
  4. # mypy: strict-optional=False
  5. import errno
  6. import hashlib
  7. import logging
  8. import os
  9. from pip._vendor.packaging.utils import canonicalize_name
  10. from pip._internal.models.link import Link
  11. from pip._internal.utils.compat import expanduser
  12. from pip._internal.utils.temp_dir import TempDirectory
  13. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  14. from pip._internal.utils.urls import path_to_url
  15. from pip._internal.wheel import InvalidWheelFilename, Wheel
  16. if MYPY_CHECK_RUNNING:
  17. from typing import Optional, Set, List, Any
  18. from pip._internal.index import FormatControl
  19. from pip._internal.pep425tags import Pep425Tag
  20. logger = logging.getLogger(__name__)
  21. class Cache(object):
  22. """An abstract class - provides cache directories for data from links
  23. :param cache_dir: The root of the cache.
  24. :param format_control: An object of FormatControl class to limit
  25. binaries being read from the cache.
  26. :param allowed_formats: which formats of files the cache should store.
  27. ('binary' and 'source' are the only allowed values)
  28. """
  29. def __init__(self, cache_dir, format_control, allowed_formats):
  30. # type: (str, FormatControl, Set[str]) -> None
  31. super(Cache, self).__init__()
  32. self.cache_dir = expanduser(cache_dir) if cache_dir else None
  33. self.format_control = format_control
  34. self.allowed_formats = allowed_formats
  35. _valid_formats = {"source", "binary"}
  36. assert self.allowed_formats.union(_valid_formats) == _valid_formats
  37. def _get_cache_path_parts(self, link):
  38. # type: (Link) -> List[str]
  39. """Get parts of part that must be os.path.joined with cache_dir
  40. """
  41. # We want to generate an url to use as our cache key, we don't want to
  42. # just re-use the URL because it might have other items in the fragment
  43. # and we don't care about those.
  44. key_parts = [link.url_without_fragment]
  45. if link.hash_name is not None and link.hash is not None:
  46. key_parts.append("=".join([link.hash_name, link.hash]))
  47. key_url = "#".join(key_parts)
  48. # Encode our key url with sha224, we'll use this because it has similar
  49. # security properties to sha256, but with a shorter total output (and
  50. # thus less secure). However the differences don't make a lot of
  51. # difference for our use case here.
  52. hashed = hashlib.sha224(key_url.encode()).hexdigest()
  53. # We want to nest the directories some to prevent having a ton of top
  54. # level directories where we might run out of sub directories on some
  55. # FS.
  56. parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
  57. return parts
  58. def _get_candidates(self, link, package_name):
  59. # type: (Link, Optional[str]) -> List[Any]
  60. can_not_cache = (
  61. not self.cache_dir or
  62. not package_name or
  63. not link
  64. )
  65. if can_not_cache:
  66. return []
  67. canonical_name = canonicalize_name(package_name)
  68. formats = self.format_control.get_allowed_formats(
  69. canonical_name
  70. )
  71. if not self.allowed_formats.intersection(formats):
  72. return []
  73. root = self.get_path_for_link(link)
  74. try:
  75. return os.listdir(root)
  76. except OSError as err:
  77. if err.errno in {errno.ENOENT, errno.ENOTDIR}:
  78. return []
  79. raise
  80. def get_path_for_link(self, link):
  81. # type: (Link) -> str
  82. """Return a directory to store cached items in for link.
  83. """
  84. raise NotImplementedError()
  85. def get(
  86. self,
  87. link, # type: Link
  88. package_name, # type: Optional[str]
  89. supported_tags, # type: List[Pep425Tag]
  90. ):
  91. # type: (...) -> Link
  92. """Returns a link to a cached item if it exists, otherwise returns the
  93. passed link.
  94. """
  95. raise NotImplementedError()
  96. def _link_for_candidate(self, link, candidate):
  97. # type: (Link, str) -> Link
  98. root = self.get_path_for_link(link)
  99. path = os.path.join(root, candidate)
  100. return Link(path_to_url(path))
  101. def cleanup(self):
  102. # type: () -> None
  103. pass
  104. class SimpleWheelCache(Cache):
  105. """A cache of wheels for future installs.
  106. """
  107. def __init__(self, cache_dir, format_control):
  108. # type: (str, FormatControl) -> None
  109. super(SimpleWheelCache, self).__init__(
  110. cache_dir, format_control, {"binary"}
  111. )
  112. def get_path_for_link(self, link):
  113. # type: (Link) -> str
  114. """Return a directory to store cached wheels for link
  115. Because there are M wheels for any one sdist, we provide a directory
  116. to cache them in, and then consult that directory when looking up
  117. cache hits.
  118. We only insert things into the cache if they have plausible version
  119. numbers, so that we don't contaminate the cache with things that were
  120. not unique. E.g. ./package might have dozens of installs done for it
  121. and build a version of 0.0...and if we built and cached a wheel, we'd
  122. end up using the same wheel even if the source has been edited.
  123. :param link: The link of the sdist for which this will cache wheels.
  124. """
  125. parts = self._get_cache_path_parts(link)
  126. # Store wheels within the root cache_dir
  127. return os.path.join(self.cache_dir, "wheels", *parts)
  128. def get(
  129. self,
  130. link, # type: Link
  131. package_name, # type: Optional[str]
  132. supported_tags, # type: List[Pep425Tag]
  133. ):
  134. # type: (...) -> Link
  135. candidates = []
  136. for wheel_name in self._get_candidates(link, package_name):
  137. try:
  138. wheel = Wheel(wheel_name)
  139. except InvalidWheelFilename:
  140. continue
  141. if not wheel.supported(supported_tags):
  142. # Built for a different python/arch/etc
  143. continue
  144. candidates.append(
  145. (wheel.support_index_min(supported_tags), wheel_name)
  146. )
  147. if not candidates:
  148. return link
  149. return self._link_for_candidate(link, min(candidates)[1])
  150. class EphemWheelCache(SimpleWheelCache):
  151. """A SimpleWheelCache that creates it's own temporary cache directory
  152. """
  153. def __init__(self, format_control):
  154. # type: (FormatControl) -> None
  155. self._temp_dir = TempDirectory(kind="ephem-wheel-cache")
  156. super(EphemWheelCache, self).__init__(
  157. self._temp_dir.path, format_control
  158. )
  159. def cleanup(self):
  160. # type: () -> None
  161. self._temp_dir.cleanup()
  162. class WheelCache(Cache):
  163. """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
  164. This Cache allows for gracefully degradation, using the ephem wheel cache
  165. when a certain link is not found in the simple wheel cache first.
  166. """
  167. def __init__(self, cache_dir, format_control):
  168. # type: (str, FormatControl) -> None
  169. super(WheelCache, self).__init__(
  170. cache_dir, format_control, {'binary'}
  171. )
  172. self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
  173. self._ephem_cache = EphemWheelCache(format_control)
  174. def get_path_for_link(self, link):
  175. # type: (Link) -> str
  176. return self._wheel_cache.get_path_for_link(link)
  177. def get_ephem_path_for_link(self, link):
  178. # type: (Link) -> str
  179. return self._ephem_cache.get_path_for_link(link)
  180. def get(
  181. self,
  182. link, # type: Link
  183. package_name, # type: Optional[str]
  184. supported_tags, # type: List[Pep425Tag]
  185. ):
  186. # type: (...) -> Link
  187. retval = self._wheel_cache.get(
  188. link=link,
  189. package_name=package_name,
  190. supported_tags=supported_tags,
  191. )
  192. if retval is not link:
  193. return retval
  194. return self._ephem_cache.get(
  195. link=link,
  196. package_name=package_name,
  197. supported_tags=supported_tags,
  198. )
  199. def cleanup(self):
  200. # type: () -> None
  201. self._wheel_cache.cleanup()
  202. self._ephem_cache.cleanup()