req_file.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. """
  2. Requirements file parsing
  3. """
  4. # The following comment should be removed at some point in the future.
  5. # mypy: strict-optional=False
  6. from __future__ import absolute_import
  7. import optparse
  8. import os
  9. import re
  10. import shlex
  11. import sys
  12. from pip._vendor.six.moves import filterfalse
  13. from pip._vendor.six.moves.urllib import parse as urllib_parse
  14. from pip._internal.cli import cmdoptions
  15. from pip._internal.download import get_file_content
  16. from pip._internal.exceptions import RequirementsFileParseError
  17. from pip._internal.models.search_scope import SearchScope
  18. from pip._internal.req.constructors import (
  19. install_req_from_editable,
  20. install_req_from_line,
  21. )
  22. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  23. if MYPY_CHECK_RUNNING:
  24. from typing import (
  25. Any, Callable, Iterator, List, NoReturn, Optional, Text, Tuple,
  26. )
  27. from pip._internal.req import InstallRequirement
  28. from pip._internal.cache import WheelCache
  29. from pip._internal.index import PackageFinder
  30. from pip._internal.network.session import PipSession
  31. ReqFileLines = Iterator[Tuple[int, Text]]
  32. __all__ = ['parse_requirements']
  33. SCHEME_RE = re.compile(r'^(http|https|file):', re.I)
  34. COMMENT_RE = re.compile(r'(^|\s+)#.*$')
  35. # Matches environment variable-style values in '${MY_VARIABLE_1}' with the
  36. # variable name consisting of only uppercase letters, digits or the '_'
  37. # (underscore). This follows the POSIX standard defined in IEEE Std 1003.1,
  38. # 2013 Edition.
  39. ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
  40. SUPPORTED_OPTIONS = [
  41. cmdoptions.constraints,
  42. cmdoptions.editable,
  43. cmdoptions.requirements,
  44. cmdoptions.no_index,
  45. cmdoptions.index_url,
  46. cmdoptions.find_links,
  47. cmdoptions.extra_index_url,
  48. cmdoptions.always_unzip,
  49. cmdoptions.no_binary,
  50. cmdoptions.only_binary,
  51. cmdoptions.pre,
  52. cmdoptions.trusted_host,
  53. cmdoptions.require_hashes,
  54. ] # type: List[Callable[..., optparse.Option]]
  55. # options to be passed to requirements
  56. SUPPORTED_OPTIONS_REQ = [
  57. cmdoptions.install_options,
  58. cmdoptions.global_options,
  59. cmdoptions.hash,
  60. ] # type: List[Callable[..., optparse.Option]]
  61. # the 'dest' string values
  62. SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ]
  63. def parse_requirements(
  64. filename, # type: str
  65. finder=None, # type: Optional[PackageFinder]
  66. comes_from=None, # type: Optional[str]
  67. options=None, # type: Optional[optparse.Values]
  68. session=None, # type: Optional[PipSession]
  69. constraint=False, # type: bool
  70. wheel_cache=None, # type: Optional[WheelCache]
  71. use_pep517=None # type: Optional[bool]
  72. ):
  73. # type: (...) -> Iterator[InstallRequirement]
  74. """Parse a requirements file and yield InstallRequirement instances.
  75. :param filename: Path or url of requirements file.
  76. :param finder: Instance of pip.index.PackageFinder.
  77. :param comes_from: Origin description of requirements.
  78. :param options: cli options.
  79. :param session: Instance of pip.download.PipSession.
  80. :param constraint: If true, parsing a constraint file rather than
  81. requirements file.
  82. :param wheel_cache: Instance of pip.wheel.WheelCache
  83. :param use_pep517: Value of the --use-pep517 option.
  84. """
  85. if session is None:
  86. raise TypeError(
  87. "parse_requirements() missing 1 required keyword argument: "
  88. "'session'"
  89. )
  90. _, content = get_file_content(
  91. filename, comes_from=comes_from, session=session
  92. )
  93. lines_enum = preprocess(content, options)
  94. for line_number, line in lines_enum:
  95. req_iter = process_line(line, filename, line_number, finder,
  96. comes_from, options, session, wheel_cache,
  97. use_pep517=use_pep517, constraint=constraint)
  98. for req in req_iter:
  99. yield req
  100. def preprocess(content, options):
  101. # type: (Text, Optional[optparse.Values]) -> ReqFileLines
  102. """Split, filter, and join lines, and return a line iterator
  103. :param content: the content of the requirements file
  104. :param options: cli options
  105. """
  106. lines_enum = enumerate(content.splitlines(), start=1) # type: ReqFileLines
  107. lines_enum = join_lines(lines_enum)
  108. lines_enum = ignore_comments(lines_enum)
  109. lines_enum = skip_regex(lines_enum, options)
  110. lines_enum = expand_env_variables(lines_enum)
  111. return lines_enum
  112. def process_line(
  113. line, # type: Text
  114. filename, # type: str
  115. line_number, # type: int
  116. finder=None, # type: Optional[PackageFinder]
  117. comes_from=None, # type: Optional[str]
  118. options=None, # type: Optional[optparse.Values]
  119. session=None, # type: Optional[PipSession]
  120. wheel_cache=None, # type: Optional[WheelCache]
  121. use_pep517=None, # type: Optional[bool]
  122. constraint=False, # type: bool
  123. ):
  124. # type: (...) -> Iterator[InstallRequirement]
  125. """Process a single requirements line; This can result in creating/yielding
  126. requirements, or updating the finder.
  127. For lines that contain requirements, the only options that have an effect
  128. are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
  129. requirement. Other options from SUPPORTED_OPTIONS may be present, but are
  130. ignored.
  131. For lines that do not contain requirements, the only options that have an
  132. effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
  133. be present, but are ignored. These lines may contain multiple options
  134. (although our docs imply only one is supported), and all our parsed and
  135. affect the finder.
  136. :param constraint: If True, parsing a constraints file.
  137. :param options: OptionParser options that we may update
  138. """
  139. parser = build_parser(line)
  140. defaults = parser.get_default_values()
  141. defaults.index_url = None
  142. if finder:
  143. defaults.format_control = finder.format_control
  144. args_str, options_str = break_args_options(line)
  145. # Prior to 2.7.3, shlex cannot deal with unicode entries
  146. if sys.version_info < (2, 7, 3):
  147. # https://github.com/python/mypy/issues/1174
  148. options_str = options_str.encode('utf8') # type: ignore
  149. # https://github.com/python/mypy/issues/1174
  150. opts, _ = parser.parse_args(
  151. shlex.split(options_str), defaults) # type: ignore
  152. # preserve for the nested code path
  153. line_comes_from = '%s %s (line %s)' % (
  154. '-c' if constraint else '-r', filename, line_number,
  155. )
  156. # yield a line requirement
  157. if args_str:
  158. isolated = options.isolated_mode if options else False
  159. if options:
  160. cmdoptions.check_install_build_global(options, opts)
  161. # get the options that apply to requirements
  162. req_options = {}
  163. for dest in SUPPORTED_OPTIONS_REQ_DEST:
  164. if dest in opts.__dict__ and opts.__dict__[dest]:
  165. req_options[dest] = opts.__dict__[dest]
  166. line_source = 'line {} of {}'.format(line_number, filename)
  167. yield install_req_from_line(
  168. args_str,
  169. comes_from=line_comes_from,
  170. use_pep517=use_pep517,
  171. isolated=isolated,
  172. options=req_options,
  173. wheel_cache=wheel_cache,
  174. constraint=constraint,
  175. line_source=line_source,
  176. )
  177. # yield an editable requirement
  178. elif opts.editables:
  179. isolated = options.isolated_mode if options else False
  180. yield install_req_from_editable(
  181. opts.editables[0], comes_from=line_comes_from,
  182. use_pep517=use_pep517,
  183. constraint=constraint, isolated=isolated, wheel_cache=wheel_cache
  184. )
  185. # parse a nested requirements file
  186. elif opts.requirements or opts.constraints:
  187. if opts.requirements:
  188. req_path = opts.requirements[0]
  189. nested_constraint = False
  190. else:
  191. req_path = opts.constraints[0]
  192. nested_constraint = True
  193. # original file is over http
  194. if SCHEME_RE.search(filename):
  195. # do a url join so relative paths work
  196. req_path = urllib_parse.urljoin(filename, req_path)
  197. # original file and nested file are paths
  198. elif not SCHEME_RE.search(req_path):
  199. # do a join so relative paths work
  200. req_path = os.path.join(os.path.dirname(filename), req_path)
  201. # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
  202. parsed_reqs = parse_requirements(
  203. req_path, finder, comes_from, options, session,
  204. constraint=nested_constraint, wheel_cache=wheel_cache
  205. )
  206. for req in parsed_reqs:
  207. yield req
  208. # percolate hash-checking option upward
  209. elif opts.require_hashes:
  210. options.require_hashes = opts.require_hashes
  211. # set finder options
  212. elif finder:
  213. find_links = finder.find_links
  214. index_urls = finder.index_urls
  215. if opts.index_url:
  216. index_urls = [opts.index_url]
  217. if opts.no_index is True:
  218. index_urls = []
  219. if opts.extra_index_urls:
  220. index_urls.extend(opts.extra_index_urls)
  221. if opts.find_links:
  222. # FIXME: it would be nice to keep track of the source
  223. # of the find_links: support a find-links local path
  224. # relative to a requirements file.
  225. value = opts.find_links[0]
  226. req_dir = os.path.dirname(os.path.abspath(filename))
  227. relative_to_reqs_file = os.path.join(req_dir, value)
  228. if os.path.exists(relative_to_reqs_file):
  229. value = relative_to_reqs_file
  230. find_links.append(value)
  231. search_scope = SearchScope(
  232. find_links=find_links,
  233. index_urls=index_urls,
  234. )
  235. finder.search_scope = search_scope
  236. if opts.pre:
  237. finder.set_allow_all_prereleases()
  238. for host in opts.trusted_hosts or []:
  239. source = 'line {} of {}'.format(line_number, filename)
  240. session.add_trusted_host(host, source=source)
  241. def break_args_options(line):
  242. # type: (Text) -> Tuple[str, Text]
  243. """Break up the line into an args and options string. We only want to shlex
  244. (and then optparse) the options, not the args. args can contain markers
  245. which are corrupted by shlex.
  246. """
  247. tokens = line.split(' ')
  248. args = []
  249. options = tokens[:]
  250. for token in tokens:
  251. if token.startswith('-') or token.startswith('--'):
  252. break
  253. else:
  254. args.append(token)
  255. options.pop(0)
  256. return ' '.join(args), ' '.join(options) # type: ignore
  257. def build_parser(line):
  258. # type: (Text) -> optparse.OptionParser
  259. """
  260. Return a parser for parsing requirement lines
  261. """
  262. parser = optparse.OptionParser(add_help_option=False)
  263. option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ
  264. for option_factory in option_factories:
  265. option = option_factory()
  266. parser.add_option(option)
  267. # By default optparse sys.exits on parsing errors. We want to wrap
  268. # that in our own exception.
  269. def parser_exit(self, msg):
  270. # type: (Any, str) -> NoReturn
  271. # add offending line
  272. msg = 'Invalid requirement: %s\n%s' % (line, msg)
  273. raise RequirementsFileParseError(msg)
  274. # NOTE: mypy disallows assigning to a method
  275. # https://github.com/python/mypy/issues/2427
  276. parser.exit = parser_exit # type: ignore
  277. return parser
  278. def join_lines(lines_enum):
  279. # type: (ReqFileLines) -> ReqFileLines
  280. """Joins a line ending in '\' with the previous line (except when following
  281. comments). The joined line takes on the index of the first line.
  282. """
  283. primary_line_number = None
  284. new_line = [] # type: List[Text]
  285. for line_number, line in lines_enum:
  286. if not line.endswith('\\') or COMMENT_RE.match(line):
  287. if COMMENT_RE.match(line):
  288. # this ensures comments are always matched later
  289. line = ' ' + line
  290. if new_line:
  291. new_line.append(line)
  292. yield primary_line_number, ''.join(new_line)
  293. new_line = []
  294. else:
  295. yield line_number, line
  296. else:
  297. if not new_line:
  298. primary_line_number = line_number
  299. new_line.append(line.strip('\\'))
  300. # last line contains \
  301. if new_line:
  302. yield primary_line_number, ''.join(new_line)
  303. # TODO: handle space after '\'.
  304. def ignore_comments(lines_enum):
  305. # type: (ReqFileLines) -> ReqFileLines
  306. """
  307. Strips comments and filter empty lines.
  308. """
  309. for line_number, line in lines_enum:
  310. line = COMMENT_RE.sub('', line)
  311. line = line.strip()
  312. if line:
  313. yield line_number, line
  314. def skip_regex(lines_enum, options):
  315. # type: (ReqFileLines, Optional[optparse.Values]) -> ReqFileLines
  316. """
  317. Skip lines that match '--skip-requirements-regex' pattern
  318. Note: the regex pattern is only built once
  319. """
  320. skip_regex = options.skip_requirements_regex if options else None
  321. if skip_regex:
  322. pattern = re.compile(skip_regex)
  323. lines_enum = filterfalse(lambda e: pattern.search(e[1]), lines_enum)
  324. return lines_enum
  325. def expand_env_variables(lines_enum):
  326. # type: (ReqFileLines) -> ReqFileLines
  327. """Replace all environment variables that can be retrieved via `os.getenv`.
  328. The only allowed format for environment variables defined in the
  329. requirement file is `${MY_VARIABLE_1}` to ensure two things:
  330. 1. Strings that contain a `$` aren't accidentally (partially) expanded.
  331. 2. Ensure consistency across platforms for requirement files.
  332. These points are the result of a discussion on the `github pull
  333. request #3514 <https://github.com/pypa/pip/pull/3514>`_.
  334. Valid characters in variable names follow the `POSIX standard
  335. <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
  336. to uppercase letter, digits and the `_` (underscore).
  337. """
  338. for line_number, line in lines_enum:
  339. for env_var, var_name in ENV_VAR_RE.findall(line):
  340. value = os.getenv(var_name)
  341. if not value:
  342. continue
  343. line = line.replace(env_var, value)
  344. yield line_number, line