session.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. """PipSession and supporting code, containing all pip-specific
  2. network request configuration and behavior.
  3. """
  4. # When mypy runs on Windows the call to distro.linux_distribution() is skipped
  5. # resulting in the failure:
  6. #
  7. # error: unused 'type: ignore' comment
  8. #
  9. # If the upstream module adds typing, this comment should be removed. See
  10. # https://github.com/nir0s/distro/pull/269
  11. #
  12. # mypy: warn-unused-ignores=False
  13. import email.utils
  14. import ipaddress
  15. import json
  16. import logging
  17. import mimetypes
  18. import os
  19. import platform
  20. import sys
  21. import urllib.parse
  22. import warnings
  23. from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple, Union
  24. from pip._vendor import requests, urllib3
  25. from pip._vendor.cachecontrol import CacheControlAdapter
  26. from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
  27. from pip._vendor.requests.models import PreparedRequest, Response
  28. from pip._vendor.requests.structures import CaseInsensitiveDict
  29. from pip._vendor.urllib3.connectionpool import ConnectionPool
  30. from pip._vendor.urllib3.exceptions import InsecureRequestWarning
  31. from pip import __version__
  32. from pip._internal.metadata import get_default_environment
  33. from pip._internal.models.link import Link
  34. from pip._internal.network.auth import MultiDomainBasicAuth
  35. from pip._internal.network.cache import SafeFileCache
  36. # Import ssl from compat so the initial import occurs in only one place.
  37. from pip._internal.utils.compat import has_tls
  38. from pip._internal.utils.glibc import libc_ver
  39. from pip._internal.utils.misc import build_url_from_netloc, parse_netloc
  40. from pip._internal.utils.urls import url_to_path
  41. logger = logging.getLogger(__name__)
  42. SecureOrigin = Tuple[str, str, Optional[Union[int, str]]]
  43. # Ignore warning raised when using --trusted-host.
  44. warnings.filterwarnings("ignore", category=InsecureRequestWarning)
  45. SECURE_ORIGINS = [
  46. # protocol, hostname, port
  47. # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC)
  48. ("https", "*", "*"),
  49. ("*", "localhost", "*"),
  50. ("*", "127.0.0.0/8", "*"),
  51. ("*", "::1/128", "*"),
  52. ("file", "*", None),
  53. # ssh is always secure.
  54. ("ssh", "*", "*"),
  55. ] # type: List[SecureOrigin]
  56. # These are environment variables present when running under various
  57. # CI systems. For each variable, some CI systems that use the variable
  58. # are indicated. The collection was chosen so that for each of a number
  59. # of popular systems, at least one of the environment variables is used.
  60. # This list is used to provide some indication of and lower bound for
  61. # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive.
  62. # For more background, see: https://github.com/pypa/pip/issues/5499
  63. CI_ENVIRONMENT_VARIABLES = (
  64. # Azure Pipelines
  65. 'BUILD_BUILDID',
  66. # Jenkins
  67. 'BUILD_ID',
  68. # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI
  69. 'CI',
  70. # Explicit environment variable.
  71. 'PIP_IS_CI',
  72. )
  73. def looks_like_ci():
  74. # type: () -> bool
  75. """
  76. Return whether it looks like pip is running under CI.
  77. """
  78. # We don't use the method of checking for a tty (e.g. using isatty())
  79. # because some CI systems mimic a tty (e.g. Travis CI). Thus that
  80. # method doesn't provide definitive information in either direction.
  81. return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES)
  82. def user_agent():
  83. # type: () -> str
  84. """
  85. Return a string representing the user agent.
  86. """
  87. data = {
  88. "installer": {"name": "pip", "version": __version__},
  89. "python": platform.python_version(),
  90. "implementation": {
  91. "name": platform.python_implementation(),
  92. },
  93. } # type: Dict[str, Any]
  94. if data["implementation"]["name"] == 'CPython':
  95. data["implementation"]["version"] = platform.python_version()
  96. elif data["implementation"]["name"] == 'PyPy':
  97. pypy_version_info = sys.pypy_version_info # type: ignore
  98. if pypy_version_info.releaselevel == 'final':
  99. pypy_version_info = pypy_version_info[:3]
  100. data["implementation"]["version"] = ".".join(
  101. [str(x) for x in pypy_version_info]
  102. )
  103. elif data["implementation"]["name"] == 'Jython':
  104. # Complete Guess
  105. data["implementation"]["version"] = platform.python_version()
  106. elif data["implementation"]["name"] == 'IronPython':
  107. # Complete Guess
  108. data["implementation"]["version"] = platform.python_version()
  109. if sys.platform.startswith("linux"):
  110. from pip._vendor import distro
  111. # https://github.com/nir0s/distro/pull/269
  112. linux_distribution = distro.linux_distribution() # type: ignore
  113. distro_infos = dict(filter(
  114. lambda x: x[1],
  115. zip(["name", "version", "id"], linux_distribution),
  116. ))
  117. libc = dict(filter(
  118. lambda x: x[1],
  119. zip(["lib", "version"], libc_ver()),
  120. ))
  121. if libc:
  122. distro_infos["libc"] = libc
  123. if distro_infos:
  124. data["distro"] = distro_infos
  125. if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
  126. data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
  127. if platform.system():
  128. data.setdefault("system", {})["name"] = platform.system()
  129. if platform.release():
  130. data.setdefault("system", {})["release"] = platform.release()
  131. if platform.machine():
  132. data["cpu"] = platform.machine()
  133. if has_tls():
  134. import _ssl as ssl
  135. data["openssl_version"] = ssl.OPENSSL_VERSION
  136. setuptools_dist = get_default_environment().get_distribution("setuptools")
  137. if setuptools_dist is not None:
  138. data["setuptools_version"] = str(setuptools_dist.version)
  139. # Use None rather than False so as not to give the impression that
  140. # pip knows it is not being run under CI. Rather, it is a null or
  141. # inconclusive result. Also, we include some value rather than no
  142. # value to make it easier to know that the check has been run.
  143. data["ci"] = True if looks_like_ci() else None
  144. user_data = os.environ.get("PIP_USER_AGENT_USER_DATA")
  145. if user_data is not None:
  146. data["user_data"] = user_data
  147. return "{data[installer][name]}/{data[installer][version]} {json}".format(
  148. data=data,
  149. json=json.dumps(data, separators=(",", ":"), sort_keys=True),
  150. )
  151. class LocalFSAdapter(BaseAdapter):
  152. def send(
  153. self,
  154. request, # type: PreparedRequest
  155. stream=False, # type: bool
  156. timeout=None, # type: Optional[Union[float, Tuple[float, float]]]
  157. verify=True, # type: Union[bool, str]
  158. cert=None, # type: Optional[Union[str, Tuple[str, str]]]
  159. proxies=None, # type:Optional[Mapping[str, str]]
  160. ):
  161. # type: (...) -> Response
  162. pathname = url_to_path(request.url)
  163. resp = Response()
  164. resp.status_code = 200
  165. resp.url = request.url
  166. try:
  167. stats = os.stat(pathname)
  168. except OSError as exc:
  169. resp.status_code = 404
  170. resp.raw = exc
  171. else:
  172. modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
  173. content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
  174. resp.headers = CaseInsensitiveDict({
  175. "Content-Type": content_type,
  176. "Content-Length": stats.st_size,
  177. "Last-Modified": modified,
  178. })
  179. resp.raw = open(pathname, "rb")
  180. resp.close = resp.raw.close
  181. return resp
  182. def close(self):
  183. # type: () -> None
  184. pass
  185. class InsecureHTTPAdapter(HTTPAdapter):
  186. def cert_verify(
  187. self,
  188. conn, # type: ConnectionPool
  189. url, # type: str
  190. verify, # type: Union[bool, str]
  191. cert, # type: Optional[Union[str, Tuple[str, str]]]
  192. ):
  193. # type: (...) -> None
  194. super().cert_verify(conn=conn, url=url, verify=False, cert=cert)
  195. class InsecureCacheControlAdapter(CacheControlAdapter):
  196. def cert_verify(
  197. self,
  198. conn, # type: ConnectionPool
  199. url, # type: str
  200. verify, # type: Union[bool, str]
  201. cert, # type: Optional[Union[str, Tuple[str, str]]]
  202. ):
  203. # type: (...) -> None
  204. super().cert_verify(conn=conn, url=url, verify=False, cert=cert)
  205. class PipSession(requests.Session):
  206. timeout = None # type: Optional[int]
  207. def __init__(
  208. self,
  209. *args, # type: Any
  210. retries=0, # type: int
  211. cache=None, # type: Optional[str]
  212. trusted_hosts=(), # type: Sequence[str]
  213. index_urls=None, # type: Optional[List[str]]
  214. **kwargs, # type: Any
  215. ):
  216. # type: (...) -> None
  217. """
  218. :param trusted_hosts: Domains not to emit warnings for when not using
  219. HTTPS.
  220. """
  221. super().__init__(*args, **kwargs)
  222. # Namespace the attribute with "pip_" just in case to prevent
  223. # possible conflicts with the base class.
  224. self.pip_trusted_origins = [] # type: List[Tuple[str, Optional[int]]]
  225. # Attach our User Agent to the request
  226. self.headers["User-Agent"] = user_agent()
  227. # Attach our Authentication handler to the session
  228. self.auth = MultiDomainBasicAuth(index_urls=index_urls)
  229. # Create our urllib3.Retry instance which will allow us to customize
  230. # how we handle retries.
  231. retries = urllib3.Retry(
  232. # Set the total number of retries that a particular request can
  233. # have.
  234. total=retries,
  235. # A 503 error from PyPI typically means that the Fastly -> Origin
  236. # connection got interrupted in some way. A 503 error in general
  237. # is typically considered a transient error so we'll go ahead and
  238. # retry it.
  239. # A 500 may indicate transient error in Amazon S3
  240. # A 520 or 527 - may indicate transient error in CloudFlare
  241. status_forcelist=[500, 503, 520, 527],
  242. # Add a small amount of back off between failed requests in
  243. # order to prevent hammering the service.
  244. backoff_factor=0.25,
  245. ) # type: ignore
  246. # Our Insecure HTTPAdapter disables HTTPS validation. It does not
  247. # support caching so we'll use it for all http:// URLs.
  248. # If caching is disabled, we will also use it for
  249. # https:// hosts that we've marked as ignoring
  250. # TLS errors for (trusted-hosts).
  251. insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
  252. # We want to _only_ cache responses on securely fetched origins or when
  253. # the host is specified as trusted. We do this because
  254. # we can't validate the response of an insecurely/untrusted fetched
  255. # origin, and we don't want someone to be able to poison the cache and
  256. # require manual eviction from the cache to fix it.
  257. if cache:
  258. secure_adapter = CacheControlAdapter(
  259. cache=SafeFileCache(cache),
  260. max_retries=retries,
  261. )
  262. self._trusted_host_adapter = InsecureCacheControlAdapter(
  263. cache=SafeFileCache(cache),
  264. max_retries=retries,
  265. )
  266. else:
  267. secure_adapter = HTTPAdapter(max_retries=retries)
  268. self._trusted_host_adapter = insecure_adapter
  269. self.mount("https://", secure_adapter)
  270. self.mount("http://", insecure_adapter)
  271. # Enable file:// urls
  272. self.mount("file://", LocalFSAdapter())
  273. for host in trusted_hosts:
  274. self.add_trusted_host(host, suppress_logging=True)
  275. def update_index_urls(self, new_index_urls):
  276. # type: (List[str]) -> None
  277. """
  278. :param new_index_urls: New index urls to update the authentication
  279. handler with.
  280. """
  281. self.auth.index_urls = new_index_urls
  282. def add_trusted_host(self, host, source=None, suppress_logging=False):
  283. # type: (str, Optional[str], bool) -> None
  284. """
  285. :param host: It is okay to provide a host that has previously been
  286. added.
  287. :param source: An optional source string, for logging where the host
  288. string came from.
  289. """
  290. if not suppress_logging:
  291. msg = f'adding trusted host: {host!r}'
  292. if source is not None:
  293. msg += f' (from {source})'
  294. logger.info(msg)
  295. host_port = parse_netloc(host)
  296. if host_port not in self.pip_trusted_origins:
  297. self.pip_trusted_origins.append(host_port)
  298. self.mount(
  299. build_url_from_netloc(host) + '/',
  300. self._trusted_host_adapter
  301. )
  302. if not host_port[1]:
  303. # Mount wildcard ports for the same host.
  304. self.mount(
  305. build_url_from_netloc(host) + ':',
  306. self._trusted_host_adapter
  307. )
  308. def iter_secure_origins(self):
  309. # type: () -> Iterator[SecureOrigin]
  310. yield from SECURE_ORIGINS
  311. for host, port in self.pip_trusted_origins:
  312. yield ('*', host, '*' if port is None else port)
  313. def is_secure_origin(self, location):
  314. # type: (Link) -> bool
  315. # Determine if this url used a secure transport mechanism
  316. parsed = urllib.parse.urlparse(str(location))
  317. origin_protocol, origin_host, origin_port = (
  318. parsed.scheme, parsed.hostname, parsed.port,
  319. )
  320. # The protocol to use to see if the protocol matches.
  321. # Don't count the repository type as part of the protocol: in
  322. # cases such as "git+ssh", only use "ssh". (I.e., Only verify against
  323. # the last scheme.)
  324. origin_protocol = origin_protocol.rsplit('+', 1)[-1]
  325. # Determine if our origin is a secure origin by looking through our
  326. # hardcoded list of secure origins, as well as any additional ones
  327. # configured on this PackageFinder instance.
  328. for secure_origin in self.iter_secure_origins():
  329. secure_protocol, secure_host, secure_port = secure_origin
  330. if origin_protocol != secure_protocol and secure_protocol != "*":
  331. continue
  332. try:
  333. addr = ipaddress.ip_address(origin_host)
  334. network = ipaddress.ip_network(secure_host)
  335. except ValueError:
  336. # We don't have both a valid address or a valid network, so
  337. # we'll check this origin against hostnames.
  338. if (
  339. origin_host and
  340. origin_host.lower() != secure_host.lower() and
  341. secure_host != "*"
  342. ):
  343. continue
  344. else:
  345. # We have a valid address and network, so see if the address
  346. # is contained within the network.
  347. if addr not in network:
  348. continue
  349. # Check to see if the port matches.
  350. if (
  351. origin_port != secure_port and
  352. secure_port != "*" and
  353. secure_port is not None
  354. ):
  355. continue
  356. # If we've gotten here, then this origin matches the current
  357. # secure origin and we should return True
  358. return True
  359. # If we've gotten to this point, then the origin isn't secure and we
  360. # will not accept it as a valid location to search. We will however
  361. # log a warning that we are ignoring it.
  362. logger.warning(
  363. "The repository located at %s is not a trusted or secure host and "
  364. "is being ignored. If this repository is available via HTTPS we "
  365. "recommend you use HTTPS instead, otherwise you may silence "
  366. "this warning and allow it anyway with '--trusted-host %s'.",
  367. origin_host,
  368. origin_host,
  369. )
  370. return False
  371. def request(self, method, url, *args, **kwargs):
  372. # type: (str, str, *Any, **Any) -> Response
  373. # Allow setting a default timeout on a session
  374. kwargs.setdefault("timeout", self.timeout)
  375. # Dispatch the actual request
  376. return super().request(method, url, *args, **kwargs)