Change venv

This commit is contained in:
Ambulance Clerc
2023-05-31 08:31:22 +02:00
parent fb6f579089
commit fdbb52c96f
466 changed files with 25899 additions and 64721 deletions

View File

@@ -1,11 +1,18 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""CacheControl import Interface.
Make it easy to import from cachecontrol without long namespaces.
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
__version__ = "0.12.6"
__version__ = "0.12.11"
from .wrapper import CacheControl
from .adapter import CacheControlAdapter
from .controller import CacheController
import logging
logging.getLogger(__name__).addHandler(logging.NullHandler())

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import logging
from pip._vendor import requests

View File

@@ -1,16 +1,20 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import types
import functools
import zlib
from pip._vendor.requests.adapters import HTTPAdapter
from .controller import CacheController
from .controller import CacheController, PERMANENT_REDIRECT_STATUSES
from .cache import DictCache
from .filewrapper import CallbackFileWrapper
class CacheControlAdapter(HTTPAdapter):
invalidating_methods = {"PUT", "DELETE"}
invalidating_methods = {"PUT", "PATCH", "DELETE"}
def __init__(
self,
@@ -93,7 +97,7 @@ class CacheControlAdapter(HTTPAdapter):
response = cached_response
# We always cache the 301 responses
elif response.status == 301:
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
self.controller.cache_response(request, response)
else:
# Wrap the response file with a wrapper that will cache the

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The cache object API for implementing caches. The default is a thread
safe in-memory dictionary.
@@ -10,7 +14,7 @@ class BaseCache(object):
def get(self, key):
raise NotImplementedError()
def set(self, key, value):
def set(self, key, value, expires=None):
raise NotImplementedError()
def delete(self, key):
@@ -29,7 +33,7 @@ class DictCache(BaseCache):
def get(self, key):
return self.data.get(key, None)
def set(self, key, value):
def set(self, key, value, expires=None):
with self.lock:
self.data.update({key: value})
@@ -37,3 +41,25 @@ class DictCache(BaseCache):
with self.lock:
if key in self.data:
self.data.pop(key)
class SeparateBodyBaseCache(BaseCache):
"""
In this variant, the body is not stored mixed in with the metadata, but is
passed in (as a bytes-like object) in a separate call to ``set_body()``.
That is, the expected interaction pattern is::
cache.set(key, serialized_metadata)
cache.set_body(key)
Similarly, the body should be loaded separately via ``get_body()``.
"""
def set_body(self, key, body):
raise NotImplementedError()
def get_body(self, key):
"""
Return the body as file-like object.
"""
raise NotImplementedError()

View File

@@ -1,2 +1,9 @@
from .file_cache import FileCache # noqa
from .redis_cache import RedisCache # noqa
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from .file_cache import FileCache, SeparateBodyFileCache
from .redis_cache import RedisCache
__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"]

View File

@@ -1,8 +1,12 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import hashlib
import os
from textwrap import dedent
from ..cache import BaseCache
from ..cache import BaseCache, SeparateBodyBaseCache
from ..controller import CacheController
try:
@@ -53,7 +57,8 @@ def _secure_open_write(filename, fmode):
raise
class FileCache(BaseCache):
class _FileCacheMixin:
"""Shared implementation for both FileCache variants."""
def __init__(
self,
@@ -114,22 +119,27 @@ class FileCache(BaseCache):
except FileNotFoundError:
return None
def set(self, key, value):
def set(self, key, value, expires=None):
name = self._fn(key)
self._write(name, value)
def _write(self, path, data: bytes):
"""
Safely write the data to the given path.
"""
# Make sure the directory exists
try:
os.makedirs(os.path.dirname(name), self.dirmode)
os.makedirs(os.path.dirname(path), self.dirmode)
except (IOError, OSError):
pass
with self.lock_class(name) as lock:
with self.lock_class(path) as lock:
# Write our actual file
with _secure_open_write(lock.path, self.filemode) as fh:
fh.write(value)
fh.write(data)
def delete(self, key):
name = self._fn(key)
def _delete(self, key, suffix):
name = self._fn(key) + suffix
if not self.forever:
try:
os.remove(name)
@@ -137,6 +147,38 @@ class FileCache(BaseCache):
pass
class FileCache(_FileCacheMixin, BaseCache):
"""
Traditional FileCache: body is stored in memory, so not suitable for large
downloads.
"""
def delete(self, key):
self._delete(key, "")
class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache):
"""
Memory-efficient FileCache: body is stored in a separate file, reducing
peak memory usage.
"""
def get_body(self, key):
name = self._fn(key) + ".body"
try:
return open(name, "rb")
except FileNotFoundError:
return None
def set_body(self, key, body):
name = self._fn(key) + ".body"
self._write(name, body)
def delete(self, key):
self._delete(key, "")
self._delete(key, ".body")
def url_to_file_path(url, filecache):
"""Return the file cache path based on the URL.

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import division
from datetime import datetime
@@ -15,9 +19,11 @@ class RedisCache(BaseCache):
def set(self, key, value, expires=None):
if not expires:
self.conn.set(key, value)
else:
elif isinstance(expires, datetime):
expires = expires - datetime.utcnow()
self.conn.setex(key, int(expires.total_seconds()), value)
else:
self.conn.setex(key, expires, value)
def delete(self, key):
self.conn.delete(key)

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
try:
from urllib.parse import urljoin
except ImportError:
@@ -9,7 +13,6 @@ try:
except ImportError:
import pickle
# Handle the case where the requests module has been patched to not have
# urllib3 bundled as part of its source.
try:

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
"""
The httplib2 algorithms ported for use with requests.
"""
@@ -9,7 +13,7 @@ from email.utils import parsedate_tz
from pip._vendor.requests.structures import CaseInsensitiveDict
from .cache import DictCache
from .cache import DictCache, SeparateBodyBaseCache
from .serialize import Serializer
@@ -17,19 +21,20 @@ logger = logging.getLogger(__name__)
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
PERMANENT_REDIRECT_STATUSES = (301, 308)
def parse_uri(uri):
"""Parses a URI using the regex given in Appendix B of RFC 3986.
(scheme, authority, path, query, fragment) = parse_uri(uri)
(scheme, authority, path, query, fragment) = parse_uri(uri)
"""
groups = URI.match(uri).groups()
return (groups[1], groups[3], groups[4], groups[6], groups[8])
class CacheController(object):
"""An interface to see if request should cached or not.
"""
"""An interface to see if request should cached or not."""
def __init__(
self, cache=None, cache_etags=True, serializer=None, status_codes=None
@@ -37,7 +42,7 @@ class CacheController(object):
self.cache = DictCache() if cache is None else cache
self.cache_etags = cache_etags
self.serializer = serializer or Serializer()
self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308)
@classmethod
def _urlnorm(cls, uri):
@@ -141,23 +146,29 @@ class CacheController(object):
logger.debug("No cache entry available")
return False
if isinstance(self.cache, SeparateBodyBaseCache):
body_file = self.cache.get_body(cache_url)
else:
body_file = None
# Check whether it can be deserialized
resp = self.serializer.loads(request, cache_data)
resp = self.serializer.loads(request, cache_data, body_file)
if not resp:
logger.warning("Cache entry deserialization failed, entry ignored")
return False
# If we have a cached 301, return it immediately. We don't
# need to test our response for other headers b/c it is
# If we have a cached permanent redirect, return it immediately. We
# don't need to test our response for other headers b/c it is
# intrinsically "cacheable" as it is Permanent.
#
# See:
# https://tools.ietf.org/html/rfc7231#section-6.4.2
#
# Client can try to refresh the value by repeating the request
# with cache busting headers as usual (ie no-cache).
if resp.status == 301:
if int(resp.status) in PERMANENT_REDIRECT_STATUSES:
msg = (
'Returning cached "301 Moved Permanently" response '
"Returning cached permanent redirect response "
"(ignoring date and etag information)"
)
logger.debug(msg)
@@ -244,6 +255,26 @@ class CacheController(object):
return new_headers
def _cache_set(self, cache_url, request, response, body=None, expires_time=None):
"""
Store the data in the cache.
"""
if isinstance(self.cache, SeparateBodyBaseCache):
# We pass in the body separately; just put a placeholder empty
# string in the metadata.
self.cache.set(
cache_url,
self.serializer.dumps(request, response, b""),
expires=expires_time,
)
self.cache.set_body(cache_url, body)
else:
self.cache.set(
cache_url,
self.serializer.dumps(request, response, body),
expires=expires_time,
)
def cache_response(self, request, response, body=None, status_codes=None):
"""
Algorithm for caching requests.
@@ -261,6 +292,11 @@ class CacheController(object):
response_headers = CaseInsensitiveDict(response.headers)
if "date" in response_headers:
date = calendar.timegm(parsedate_tz(response_headers["date"]))
else:
date = 0
# If we've been given a body, our response has a Content-Length, that
# Content-Length is valid then we can check to see if the body we've
# been given matches the expected size, and if it doesn't we'll just
@@ -304,35 +340,62 @@ class CacheController(object):
# If we've been given an etag, then keep the response
if self.cache_etags and "etag" in response_headers:
logger.debug("Caching due to etag")
self.cache.set(
cache_url, self.serializer.dumps(request, response, body=body)
)
expires_time = 0
if response_headers.get("expires"):
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires) - date
# Add to the cache any 301s. We do this before looking that
# the Date headers.
elif response.status == 301:
logger.debug("Caching permanant redirect")
self.cache.set(cache_url, self.serializer.dumps(request, response))
expires_time = max(expires_time, 14 * 86400)
logger.debug("etag object cached for {0} seconds".format(expires_time))
logger.debug("Caching due to etag")
self._cache_set(cache_url, request, response, body, expires_time)
# Add to the cache any permanent redirects. We do this before looking
# that the Date headers.
elif int(response.status) in PERMANENT_REDIRECT_STATUSES:
logger.debug("Caching permanent redirect")
self._cache_set(cache_url, request, response, b"")
# Add to the cache if the response headers demand it. If there
# is no date header then we can't do anything about expiring
# the cache.
elif "date" in response_headers:
date = calendar.timegm(parsedate_tz(response_headers["date"]))
# cache when there is a max-age > 0
if "max-age" in cc and cc["max-age"] > 0:
logger.debug("Caching b/c date exists and max-age > 0")
self.cache.set(
cache_url, self.serializer.dumps(request, response, body=body)
expires_time = cc["max-age"]
self._cache_set(
cache_url,
request,
response,
body,
expires_time,
)
# If the request can expire, it means we should cache it
# in the meantime.
elif "expires" in response_headers:
if response_headers["expires"]:
logger.debug("Caching b/c of expires header")
self.cache.set(
cache_url, self.serializer.dumps(request, response, body=body)
expires = parsedate_tz(response_headers["expires"])
if expires is not None:
expires_time = calendar.timegm(expires) - date
else:
expires_time = None
logger.debug(
"Caching b/c of expires header. expires in {0} seconds".format(
expires_time
)
)
self._cache_set(
cache_url,
request,
response,
body,
expires_time,
)
def update_cached_response(self, request, response):
@@ -371,6 +434,6 @@ class CacheController(object):
cached_response.status = 200
# update our cache
self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
self._cache_set(cache_url, request, cached_response)
return cached_response

View File

@@ -1,4 +1,9 @@
from io import BytesIO
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from tempfile import NamedTemporaryFile
import mmap
class CallbackFileWrapper(object):
@@ -11,10 +16,17 @@ class CallbackFileWrapper(object):
This class uses members with a double underscore (__) leading prefix so as
not to accidentally shadow an attribute.
The data is stored in a temporary file until it is all available. As long
as the temporary files directory is disk-based (sometimes it's a
memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory
pressure is high. For small files the disk usually won't be used at all,
it'll all be in the filesystem memory cache, so there should be no
performance impact.
"""
def __init__(self, fp, callback):
self.__buf = BytesIO()
self.__buf = NamedTemporaryFile("rb+", delete=True)
self.__fp = fp
self.__callback = callback
@@ -49,7 +61,19 @@ class CallbackFileWrapper(object):
def _close(self):
if self.__callback:
self.__callback(self.__buf.getvalue())
if self.__buf.tell() == 0:
# Empty file:
result = b""
else:
# Return the data without actually loading it into memory,
# relying on Python's buffer API and mmap(). mmap() just gives
# a view directly into the filesystem's memory cache, so it
# doesn't result in duplicate memory use.
self.__buf.seek(0, 0)
result = memoryview(
mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ)
)
self.__callback(result)
# We assign this to None here, because otherwise we can get into
# really tricky problems where the CPython interpreter dead locks
@@ -58,9 +82,16 @@ class CallbackFileWrapper(object):
# and allows the garbage collector to do it's thing normally.
self.__callback = None
# Closing the temporary file releases memory and frees disk space.
# Important when caching big files.
self.__buf.close()
def read(self, amt=None):
data = self.__fp.read(amt)
self.__buf.write(data)
if data:
# We may be dealing with b'', a sign that things are over:
# it's passed e.g. after we've already closed self.__buf.
self.__buf.write(data)
if self.__is_fp_closed():
self._close()

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import calendar
import time

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
import base64
import io
import json
@@ -17,24 +21,18 @@ def _b64_decode_str(s):
return _b64_decode_bytes(s).decode("utf8")
class Serializer(object):
_default_body_read = object()
class Serializer(object):
def dumps(self, request, response, body=None):
response_headers = CaseInsensitiveDict(response.headers)
if body is None:
# When a body isn't passed in, we'll read the response. We
# also update the response with a new file handler to be
# sure it acts as though it was never read.
body = response.read(decode_content=False)
# NOTE: 99% sure this is dead code. I'm only leaving it
# here b/c I don't have a test yet to prove
# it. Basically, before using
# `cachecontrol.filewrapper.CallbackFileWrapper`,
# this made an effort to reset the file handle. The
# `CallbackFileWrapper` short circuits this code by
# setting the body as the content is consumed, the
# result being a `body` argument is *always* passed
# into cache_response, and in turn,
# `Serializer.dump`.
response._fp = io.BytesIO(body)
# NOTE: This is all a bit weird, but it's really important that on
@@ -46,7 +44,7 @@ class Serializer(object):
# enough to have msgpack know the difference.
data = {
u"response": {
u"body": body,
u"body": body, # Empty bytestring if body is stored separately
u"headers": dict(
(text_type(k), text_type(v)) for k, v in response.headers.items()
),
@@ -71,7 +69,7 @@ class Serializer(object):
return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
def loads(self, request, data):
def loads(self, request, data, body_file=None):
# Short circuit if we've been given an empty set of data
if not data:
return
@@ -94,14 +92,14 @@ class Serializer(object):
# Dispatch to the actual load method for the given version
try:
return getattr(self, "_loads_v{}".format(ver))(request, data)
return getattr(self, "_loads_v{}".format(ver))(request, data, body_file)
except AttributeError:
# This is a version we don't have a loads function for, so we'll
# just treat it as a miss and return None
return
def prepare_response(self, request, cached):
def prepare_response(self, request, cached, body_file=None):
"""Verify our vary headers match and construct a real urllib3
HTTPResponse object.
"""
@@ -127,7 +125,10 @@ class Serializer(object):
cached["response"]["headers"] = headers
try:
body = io.BytesIO(body_raw)
if body_file is None:
body = io.BytesIO(body_raw)
else:
body = body_file
except TypeError:
# This can happen if cachecontrol serialized to v1 format (pickle)
# using Python 2. A Python 2 str(byte string) will be unpickled as
@@ -139,21 +140,22 @@ class Serializer(object):
return HTTPResponse(body=body, preload_content=False, **cached["response"])
def _loads_v0(self, request, data):
def _loads_v0(self, request, data, body_file=None):
# The original legacy cache data. This doesn't contain enough
# information to construct everything we need, so we'll treat this as
# a miss.
return
def _loads_v1(self, request, data):
def _loads_v1(self, request, data, body_file=None):
try:
cached = pickle.loads(data)
except ValueError:
return
return self.prepare_response(request, cached)
return self.prepare_response(request, cached, body_file)
def _loads_v2(self, request, data):
def _loads_v2(self, request, data, body_file=None):
assert body_file is None
try:
cached = json.loads(zlib.decompress(data).decode("utf8"))
except (ValueError, zlib.error):
@@ -171,18 +173,18 @@ class Serializer(object):
for k, v in cached["vary"].items()
)
return self.prepare_response(request, cached)
return self.prepare_response(request, cached, body_file)
def _loads_v3(self, request, data):
def _loads_v3(self, request, data, body_file):
# Due to Python 2 encoding issues, it's impossible to know for sure
# exactly how to load v3 entries, thus we'll treat these as a miss so
# that they get rewritten out as v4 entries.
return
def _loads_v4(self, request, data):
def _loads_v4(self, request, data, body_file=None):
try:
cached = msgpack.loads(data, raw=False)
except ValueError:
return
return self.prepare_response(request, cached)
return self.prepare_response(request, cached, body_file)

View File

@@ -1,3 +1,7 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from .adapter import CacheControlAdapter
from .cache import DictCache