Ajoutez des fichiers projet.
This commit is contained in:
111
venv/Lib/site-packages/pip/_vendor/__init__.py
Normal file
111
venv/Lib/site-packages/pip/_vendor/__init__.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
pip._vendor is for vendoring dependencies of pip to prevent needing pip to
|
||||
depend on something external.
|
||||
|
||||
Files inside of pip._vendor should be considered immutable and should only be
|
||||
updated to versions from upstream.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import glob
|
||||
import os.path
|
||||
import sys
|
||||
|
||||
# Downstream redistributors which have debundled our dependencies should also
|
||||
# patch this value to be true. This will trigger the additional patching
|
||||
# to cause things like "six" to be available as pip.
|
||||
DEBUNDLED = False
|
||||
|
||||
# By default, look in this directory for a bunch of .whl files which we will
|
||||
# add to the beginning of sys.path before attempting to import anything. This
|
||||
# is done to support downstream re-distributors like Debian and Fedora who
|
||||
# wish to create their own Wheels for our dependencies to aid in debundling.
|
||||
WHEEL_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
|
||||
|
||||
# Define a small helper function to alias our vendored modules to the real ones
|
||||
# if the vendored ones do not exist. This idea of this was taken from
|
||||
# https://github.com/kennethreitz/requests/pull/2567.
|
||||
def vendored(modulename):
|
||||
vendored_name = "{0}.{1}".format(__name__, modulename)
|
||||
|
||||
try:
|
||||
__import__(modulename, globals(), locals(), level=0)
|
||||
except ImportError:
|
||||
# We can just silently allow import failures to pass here. If we
|
||||
# got to this point it means that ``import pip._vendor.whatever``
|
||||
# failed and so did ``import whatever``. Since we're importing this
|
||||
# upfront in an attempt to alias imports, not erroring here will
|
||||
# just mean we get a regular import error whenever pip *actually*
|
||||
# tries to import one of these modules to use it, which actually
|
||||
# gives us a better error message than we would have otherwise
|
||||
# gotten.
|
||||
pass
|
||||
else:
|
||||
sys.modules[vendored_name] = sys.modules[modulename]
|
||||
base, head = vendored_name.rsplit(".", 1)
|
||||
setattr(sys.modules[base], head, sys.modules[modulename])
|
||||
|
||||
|
||||
# If we're operating in a debundled setup, then we want to go ahead and trigger
|
||||
# the aliasing of our vendored libraries as well as looking for wheels to add
|
||||
# to our sys.path. This will cause all of this code to be a no-op typically
|
||||
# however downstream redistributors can enable it in a consistent way across
|
||||
# all platforms.
|
||||
if DEBUNDLED:
|
||||
# Actually look inside of WHEEL_DIR to find .whl files and add them to the
|
||||
# front of our sys.path.
|
||||
sys.path[:] = glob.glob(os.path.join(WHEEL_DIR, "*.whl")) + sys.path
|
||||
|
||||
# Actually alias all of our vendored dependencies.
|
||||
vendored("appdirs")
|
||||
vendored("cachecontrol")
|
||||
vendored("certifi")
|
||||
vendored("colorama")
|
||||
vendored("distlib")
|
||||
vendored("distro")
|
||||
vendored("html5lib")
|
||||
vendored("six")
|
||||
vendored("six.moves")
|
||||
vendored("six.moves.urllib")
|
||||
vendored("six.moves.urllib.parse")
|
||||
vendored("packaging")
|
||||
vendored("packaging.version")
|
||||
vendored("packaging.specifiers")
|
||||
vendored("pep517")
|
||||
vendored("pkg_resources")
|
||||
vendored("progress")
|
||||
vendored("requests")
|
||||
vendored("requests.exceptions")
|
||||
vendored("requests.packages")
|
||||
vendored("requests.packages.urllib3")
|
||||
vendored("requests.packages.urllib3._collections")
|
||||
vendored("requests.packages.urllib3.connection")
|
||||
vendored("requests.packages.urllib3.connectionpool")
|
||||
vendored("requests.packages.urllib3.contrib")
|
||||
vendored("requests.packages.urllib3.contrib.ntlmpool")
|
||||
vendored("requests.packages.urllib3.contrib.pyopenssl")
|
||||
vendored("requests.packages.urllib3.exceptions")
|
||||
vendored("requests.packages.urllib3.fields")
|
||||
vendored("requests.packages.urllib3.filepost")
|
||||
vendored("requests.packages.urllib3.packages")
|
||||
vendored("requests.packages.urllib3.packages.ordered_dict")
|
||||
vendored("requests.packages.urllib3.packages.six")
|
||||
vendored("requests.packages.urllib3.packages.ssl_match_hostname")
|
||||
vendored("requests.packages.urllib3.packages.ssl_match_hostname."
|
||||
"_implementation")
|
||||
vendored("requests.packages.urllib3.poolmanager")
|
||||
vendored("requests.packages.urllib3.request")
|
||||
vendored("requests.packages.urllib3.response")
|
||||
vendored("requests.packages.urllib3.util")
|
||||
vendored("requests.packages.urllib3.util.connection")
|
||||
vendored("requests.packages.urllib3.util.request")
|
||||
vendored("requests.packages.urllib3.util.response")
|
||||
vendored("requests.packages.urllib3.util.retry")
|
||||
vendored("requests.packages.urllib3.util.ssl_")
|
||||
vendored("requests.packages.urllib3.util.timeout")
|
||||
vendored("requests.packages.urllib3.util.url")
|
||||
vendored("resolvelib")
|
||||
vendored("tenacity")
|
||||
vendored("tomli")
|
||||
vendored("urllib3")
|
||||
633
venv/Lib/site-packages/pip/_vendor/appdirs.py
Normal file
633
venv/Lib/site-packages/pip/_vendor/appdirs.py
Normal file
@@ -0,0 +1,633 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2005-2010 ActiveState Software Inc.
|
||||
# Copyright (c) 2013 Eddy Petrișor
|
||||
|
||||
"""Utilities for determining application-specific dirs.
|
||||
|
||||
See <http://github.com/ActiveState/appdirs> for details and usage.
|
||||
"""
|
||||
# Dev Notes:
|
||||
# - MSDN on where to store app data files:
|
||||
# http://support.microsoft.com/default.aspx?scid=kb;en-us;310294#XSLTH3194121123120121120120
|
||||
# - Mac OS X: http://developer.apple.com/documentation/MacOSX/Conceptual/BPFileSystem/index.html
|
||||
# - XDG spec for Un*x: http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
|
||||
|
||||
__version__ = "1.4.4"
|
||||
__version_info__ = tuple(int(segment) for segment in __version__.split("."))
|
||||
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
if PY3:
|
||||
unicode = str
|
||||
|
||||
if sys.platform.startswith('java'):
|
||||
import platform
|
||||
os_name = platform.java_ver()[3][0]
|
||||
if os_name.startswith('Windows'): # "Windows XP", "Windows 7", etc.
|
||||
system = 'win32'
|
||||
elif os_name.startswith('Mac'): # "Mac OS X", etc.
|
||||
system = 'darwin'
|
||||
else: # "Linux", "SunOS", "FreeBSD", etc.
|
||||
# Setting this to "linux2" is not ideal, but only Windows or Mac
|
||||
# are actually checked for and the rest of the module expects
|
||||
# *sys.platform* style strings.
|
||||
system = 'linux2'
|
||||
elif sys.platform == 'cli' and os.name == 'nt':
|
||||
# Detect Windows in IronPython to match pip._internal.utils.compat.WINDOWS
|
||||
# Discussion: <https://github.com/pypa/pip/pull/7501>
|
||||
system = 'win32'
|
||||
else:
|
||||
system = sys.platform
|
||||
|
||||
|
||||
|
||||
def user_data_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user data directories are:
|
||||
Mac OS X: ~/Library/Application Support/<AppName> # or ~/.config/<AppName>, if the other does not exist
|
||||
Unix: ~/.local/share/<AppName> # or in $XDG_DATA_HOME, if defined
|
||||
Win XP (not roaming): C:\Documents and Settings\<username>\Application Data\<AppAuthor>\<AppName>
|
||||
Win XP (roaming): C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>
|
||||
Win 7 (not roaming): C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>
|
||||
Win 7 (roaming): C:\Users\<username>\AppData\Roaming\<AppAuthor>\<AppName>
|
||||
|
||||
For Unix, we follow the XDG spec and support $XDG_DATA_HOME.
|
||||
That means, by default "~/.local/share/<AppName>".
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
const = roaming and "CSIDL_APPDATA" or "CSIDL_LOCAL_APPDATA"
|
||||
path = os.path.normpath(_get_win_folder(const))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('~/Library/Application Support/')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
path = os.getenv('XDG_DATA_HOME', os.path.expanduser("~/.local/share"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def site_data_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||
r"""Return full path to the user-shared data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"multipath" is an optional parameter only applicable to *nix
|
||||
which indicates that the entire list of data dirs should be
|
||||
returned. By default, the first item from XDG_DATA_DIRS is
|
||||
returned, or '/usr/local/share/<AppName>',
|
||||
if XDG_DATA_DIRS is not set
|
||||
|
||||
Typical site data directories are:
|
||||
Mac OS X: /Library/Application Support/<AppName>
|
||||
Unix: /usr/local/share/<AppName> or /usr/share/<AppName>
|
||||
Win XP: C:\Documents and Settings\All Users\Application Data\<AppAuthor>\<AppName>
|
||||
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||
Win 7: C:\ProgramData\<AppAuthor>\<AppName> # Hidden, but writeable on Win 7.
|
||||
|
||||
For Unix, this is using the $XDG_DATA_DIRS[0] default.
|
||||
|
||||
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
path = os.path.normpath(_get_win_folder("CSIDL_COMMON_APPDATA"))
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('/Library/Application Support')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
# XDG default for $XDG_DATA_DIRS
|
||||
# only first, if multipath is False
|
||||
path = os.getenv('XDG_DATA_DIRS',
|
||||
os.pathsep.join(['/usr/local/share', '/usr/share']))
|
||||
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep)]
|
||||
if appname:
|
||||
if version:
|
||||
appname = os.path.join(appname, version)
|
||||
pathlist = [os.path.join(x, appname) for x in pathlist]
|
||||
|
||||
if multipath:
|
||||
path = os.pathsep.join(pathlist)
|
||||
else:
|
||||
path = pathlist[0]
|
||||
return path
|
||||
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_config_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific config dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user config directories are:
|
||||
Mac OS X: same as user_data_dir
|
||||
Unix: ~/.config/<AppName> # or in $XDG_CONFIG_HOME, if defined
|
||||
Win *: same as user_data_dir
|
||||
|
||||
For Unix, we follow the XDG spec and support $XDG_CONFIG_HOME.
|
||||
That means, by default "~/.config/<AppName>".
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = user_data_dir(appname, appauthor, None, roaming)
|
||||
else:
|
||||
path = os.getenv('XDG_CONFIG_HOME', os.path.expanduser("~/.config"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
# for the discussion regarding site_config_dir locations
|
||||
# see <https://github.com/pypa/pip/issues/1733>
|
||||
def site_config_dir(appname=None, appauthor=None, version=None, multipath=False):
|
||||
r"""Return full path to the user-shared data dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"multipath" is an optional parameter only applicable to *nix
|
||||
which indicates that the entire list of config dirs should be
|
||||
returned. By default, the first item from XDG_CONFIG_DIRS is
|
||||
returned, or '/etc/xdg/<AppName>', if XDG_CONFIG_DIRS is not set
|
||||
|
||||
Typical site config directories are:
|
||||
Mac OS X: same as site_data_dir
|
||||
Unix: /etc/xdg/<AppName> or $XDG_CONFIG_DIRS[i]/<AppName> for each value in
|
||||
$XDG_CONFIG_DIRS
|
||||
Win *: same as site_data_dir
|
||||
Vista: (Fail! "C:\ProgramData" is a hidden *system* directory on Vista.)
|
||||
|
||||
For Unix, this is using the $XDG_CONFIG_DIRS[0] default, if multipath=False
|
||||
|
||||
WARNING: Do not use this on Windows. See the Vista-Fail note above for why.
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = site_data_dir(appname, appauthor)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
else:
|
||||
# XDG default for $XDG_CONFIG_DIRS (missing or empty)
|
||||
# see <https://github.com/pypa/pip/pull/7501#discussion_r360624829>
|
||||
# only first, if multipath is False
|
||||
path = os.getenv('XDG_CONFIG_DIRS') or '/etc/xdg'
|
||||
pathlist = [os.path.expanduser(x.rstrip(os.sep)) for x in path.split(os.pathsep) if x]
|
||||
if appname:
|
||||
if version:
|
||||
appname = os.path.join(appname, version)
|
||||
pathlist = [os.path.join(x, appname) for x in pathlist]
|
||||
|
||||
if multipath:
|
||||
path = os.pathsep.join(pathlist)
|
||||
else:
|
||||
path = pathlist[0]
|
||||
return path
|
||||
|
||||
|
||||
def user_cache_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||
r"""Return full path to the user-specific cache dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"opinion" (boolean) can be False to disable the appending of
|
||||
"Cache" to the base app data dir for Windows. See
|
||||
discussion below.
|
||||
|
||||
Typical user cache directories are:
|
||||
Mac OS X: ~/Library/Caches/<AppName>
|
||||
Unix: ~/.cache/<AppName> (XDG default)
|
||||
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Cache
|
||||
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Cache
|
||||
|
||||
On Windows the only suggestion in the MSDN docs is that local settings go in
|
||||
the `CSIDL_LOCAL_APPDATA` directory. This is identical to the non-roaming
|
||||
app data dir (the default returned by `user_data_dir` above). Apps typically
|
||||
put cache data somewhere *under* the given dir here. Some examples:
|
||||
...\Mozilla\Firefox\Profiles\<ProfileName>\Cache
|
||||
...\Acme\SuperApp\Cache\1.0
|
||||
OPINION: This function appends "Cache" to the `CSIDL_LOCAL_APPDATA` value.
|
||||
This can be disabled with the `opinion=False` option.
|
||||
"""
|
||||
if system == "win32":
|
||||
if appauthor is None:
|
||||
appauthor = appname
|
||||
path = os.path.normpath(_get_win_folder("CSIDL_LOCAL_APPDATA"))
|
||||
# When using Python 2, return paths as bytes on Windows like we do on
|
||||
# other operating systems. See helper function docs for more details.
|
||||
if not PY3 and isinstance(path, unicode):
|
||||
path = _win_path_to_bytes(path)
|
||||
if appname:
|
||||
if appauthor is not False:
|
||||
path = os.path.join(path, appauthor, appname)
|
||||
else:
|
||||
path = os.path.join(path, appname)
|
||||
if opinion:
|
||||
path = os.path.join(path, "Cache")
|
||||
elif system == 'darwin':
|
||||
path = os.path.expanduser('~/Library/Caches')
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
else:
|
||||
path = os.getenv('XDG_CACHE_HOME', os.path.expanduser('~/.cache'))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_state_dir(appname=None, appauthor=None, version=None, roaming=False):
|
||||
r"""Return full path to the user-specific state dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"roaming" (boolean, default False) can be set True to use the Windows
|
||||
roaming appdata directory. That means that for users on a Windows
|
||||
network setup for roaming profiles, this user data will be
|
||||
sync'd on login. See
|
||||
<http://technet.microsoft.com/en-us/library/cc766489(WS.10).aspx>
|
||||
for a discussion of issues.
|
||||
|
||||
Typical user state directories are:
|
||||
Mac OS X: same as user_data_dir
|
||||
Unix: ~/.local/state/<AppName> # or in $XDG_STATE_HOME, if defined
|
||||
Win *: same as user_data_dir
|
||||
|
||||
For Unix, we follow this Debian proposal <https://wiki.debian.org/XDGBaseDirectorySpecification#state>
|
||||
to extend the XDG spec and support $XDG_STATE_HOME.
|
||||
|
||||
That means, by default "~/.local/state/<AppName>".
|
||||
"""
|
||||
if system in ["win32", "darwin"]:
|
||||
path = user_data_dir(appname, appauthor, None, roaming)
|
||||
else:
|
||||
path = os.getenv('XDG_STATE_HOME', os.path.expanduser("~/.local/state"))
|
||||
if appname:
|
||||
path = os.path.join(path, appname)
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
def user_log_dir(appname=None, appauthor=None, version=None, opinion=True):
|
||||
r"""Return full path to the user-specific log dir for this application.
|
||||
|
||||
"appname" is the name of application.
|
||||
If None, just the system directory is returned.
|
||||
"appauthor" (only used on Windows) is the name of the
|
||||
appauthor or distributing body for this application. Typically
|
||||
it is the owning company name. This falls back to appname. You may
|
||||
pass False to disable it.
|
||||
"version" is an optional version path element to append to the
|
||||
path. You might want to use this if you want multiple versions
|
||||
of your app to be able to run independently. If used, this
|
||||
would typically be "<major>.<minor>".
|
||||
Only applied when appname is present.
|
||||
"opinion" (boolean) can be False to disable the appending of
|
||||
"Logs" to the base app data dir for Windows, and "log" to the
|
||||
base cache dir for Unix. See discussion below.
|
||||
|
||||
Typical user log directories are:
|
||||
Mac OS X: ~/Library/Logs/<AppName>
|
||||
Unix: ~/.cache/<AppName>/log # or under $XDG_CACHE_HOME if defined
|
||||
Win XP: C:\Documents and Settings\<username>\Local Settings\Application Data\<AppAuthor>\<AppName>\Logs
|
||||
Vista: C:\Users\<username>\AppData\Local\<AppAuthor>\<AppName>\Logs
|
||||
|
||||
On Windows the only suggestion in the MSDN docs is that local settings
|
||||
go in the `CSIDL_LOCAL_APPDATA` directory. (Note: I'm interested in
|
||||
examples of what some windows apps use for a logs dir.)
|
||||
|
||||
OPINION: This function appends "Logs" to the `CSIDL_LOCAL_APPDATA`
|
||||
value for Windows and appends "log" to the user cache dir for Unix.
|
||||
This can be disabled with the `opinion=False` option.
|
||||
"""
|
||||
if system == "darwin":
|
||||
path = os.path.join(
|
||||
os.path.expanduser('~/Library/Logs'),
|
||||
appname)
|
||||
elif system == "win32":
|
||||
path = user_data_dir(appname, appauthor, version)
|
||||
version = False
|
||||
if opinion:
|
||||
path = os.path.join(path, "Logs")
|
||||
else:
|
||||
path = user_cache_dir(appname, appauthor, version)
|
||||
version = False
|
||||
if opinion:
|
||||
path = os.path.join(path, "log")
|
||||
if appname and version:
|
||||
path = os.path.join(path, version)
|
||||
return path
|
||||
|
||||
|
||||
class AppDirs(object):
|
||||
"""Convenience wrapper for getting application dirs."""
|
||||
def __init__(self, appname=None, appauthor=None, version=None,
|
||||
roaming=False, multipath=False):
|
||||
self.appname = appname
|
||||
self.appauthor = appauthor
|
||||
self.version = version
|
||||
self.roaming = roaming
|
||||
self.multipath = multipath
|
||||
|
||||
@property
|
||||
def user_data_dir(self):
|
||||
return user_data_dir(self.appname, self.appauthor,
|
||||
version=self.version, roaming=self.roaming)
|
||||
|
||||
@property
|
||||
def site_data_dir(self):
|
||||
return site_data_dir(self.appname, self.appauthor,
|
||||
version=self.version, multipath=self.multipath)
|
||||
|
||||
@property
|
||||
def user_config_dir(self):
|
||||
return user_config_dir(self.appname, self.appauthor,
|
||||
version=self.version, roaming=self.roaming)
|
||||
|
||||
@property
|
||||
def site_config_dir(self):
|
||||
return site_config_dir(self.appname, self.appauthor,
|
||||
version=self.version, multipath=self.multipath)
|
||||
|
||||
@property
|
||||
def user_cache_dir(self):
|
||||
return user_cache_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
@property
|
||||
def user_state_dir(self):
|
||||
return user_state_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
@property
|
||||
def user_log_dir(self):
|
||||
return user_log_dir(self.appname, self.appauthor,
|
||||
version=self.version)
|
||||
|
||||
|
||||
#---- internal support stuff
|
||||
|
||||
def _get_win_folder_from_registry(csidl_name):
|
||||
"""This is a fallback technique at best. I'm not sure if using the
|
||||
registry for this guarantees us the correct answer for all CSIDL_*
|
||||
names.
|
||||
"""
|
||||
if PY3:
|
||||
import winreg as _winreg
|
||||
else:
|
||||
import _winreg
|
||||
|
||||
shell_folder_name = {
|
||||
"CSIDL_APPDATA": "AppData",
|
||||
"CSIDL_COMMON_APPDATA": "Common AppData",
|
||||
"CSIDL_LOCAL_APPDATA": "Local AppData",
|
||||
}[csidl_name]
|
||||
|
||||
key = _winreg.OpenKey(
|
||||
_winreg.HKEY_CURRENT_USER,
|
||||
r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders"
|
||||
)
|
||||
dir, type = _winreg.QueryValueEx(key, shell_folder_name)
|
||||
return dir
|
||||
|
||||
|
||||
def _get_win_folder_with_pywin32(csidl_name):
|
||||
from win32com.shell import shellcon, shell
|
||||
dir = shell.SHGetFolderPath(0, getattr(shellcon, csidl_name), 0, 0)
|
||||
# Try to make this a unicode path because SHGetFolderPath does
|
||||
# not return unicode strings when there is unicode data in the
|
||||
# path.
|
||||
try:
|
||||
dir = unicode(dir)
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in dir:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
try:
|
||||
import win32api
|
||||
dir = win32api.GetShortPathName(dir)
|
||||
except ImportError:
|
||||
pass
|
||||
except UnicodeError:
|
||||
pass
|
||||
return dir
|
||||
|
||||
|
||||
def _get_win_folder_with_ctypes(csidl_name):
|
||||
import ctypes
|
||||
|
||||
csidl_const = {
|
||||
"CSIDL_APPDATA": 26,
|
||||
"CSIDL_COMMON_APPDATA": 35,
|
||||
"CSIDL_LOCAL_APPDATA": 28,
|
||||
}[csidl_name]
|
||||
|
||||
buf = ctypes.create_unicode_buffer(1024)
|
||||
ctypes.windll.shell32.SHGetFolderPathW(None, csidl_const, None, 0, buf)
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in buf:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
buf2 = ctypes.create_unicode_buffer(1024)
|
||||
if ctypes.windll.kernel32.GetShortPathNameW(buf.value, buf2, 1024):
|
||||
buf = buf2
|
||||
|
||||
return buf.value
|
||||
|
||||
def _get_win_folder_with_jna(csidl_name):
|
||||
import array
|
||||
from com.sun import jna
|
||||
from com.sun.jna.platform import win32
|
||||
|
||||
buf_size = win32.WinDef.MAX_PATH * 2
|
||||
buf = array.zeros('c', buf_size)
|
||||
shell = win32.Shell32.INSTANCE
|
||||
shell.SHGetFolderPath(None, getattr(win32.ShlObj, csidl_name), None, win32.ShlObj.SHGFP_TYPE_CURRENT, buf)
|
||||
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||
|
||||
# Downgrade to short path name if have highbit chars. See
|
||||
# <http://bugs.activestate.com/show_bug.cgi?id=85099>.
|
||||
has_high_char = False
|
||||
for c in dir:
|
||||
if ord(c) > 255:
|
||||
has_high_char = True
|
||||
break
|
||||
if has_high_char:
|
||||
buf = array.zeros('c', buf_size)
|
||||
kernel = win32.Kernel32.INSTANCE
|
||||
if kernel.GetShortPathName(dir, buf, buf_size):
|
||||
dir = jna.Native.toString(buf.tostring()).rstrip("\0")
|
||||
|
||||
return dir
|
||||
|
||||
if system == "win32":
|
||||
try:
|
||||
from ctypes import windll
|
||||
_get_win_folder = _get_win_folder_with_ctypes
|
||||
except ImportError:
|
||||
try:
|
||||
import com.sun.jna
|
||||
_get_win_folder = _get_win_folder_with_jna
|
||||
except ImportError:
|
||||
_get_win_folder = _get_win_folder_from_registry
|
||||
|
||||
|
||||
def _win_path_to_bytes(path):
|
||||
"""Encode Windows paths to bytes. Only used on Python 2.
|
||||
|
||||
Motivation is to be consistent with other operating systems where paths
|
||||
are also returned as bytes. This avoids problems mixing bytes and Unicode
|
||||
elsewhere in the codebase. For more details and discussion see
|
||||
<https://github.com/pypa/pip/issues/3463>.
|
||||
|
||||
If encoding using ASCII and MBCS fails, return the original Unicode path.
|
||||
"""
|
||||
for encoding in ('ASCII', 'MBCS'):
|
||||
try:
|
||||
return path.encode(encoding)
|
||||
except (UnicodeEncodeError, LookupError):
|
||||
pass
|
||||
return path
|
||||
|
||||
|
||||
#---- self test code
|
||||
|
||||
if __name__ == "__main__":
|
||||
appname = "MyApp"
|
||||
appauthor = "MyCompany"
|
||||
|
||||
props = ("user_data_dir",
|
||||
"user_config_dir",
|
||||
"user_cache_dir",
|
||||
"user_state_dir",
|
||||
"user_log_dir",
|
||||
"site_data_dir",
|
||||
"site_config_dir")
|
||||
|
||||
print("-- app dirs %s --" % __version__)
|
||||
|
||||
print("-- app dirs (with optional 'version')")
|
||||
dirs = AppDirs(appname, appauthor, version="1.0")
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (without optional 'version')")
|
||||
dirs = AppDirs(appname, appauthor)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (without optional 'appauthor')")
|
||||
dirs = AppDirs(appname)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
|
||||
print("\n-- app dirs (with disabled 'appauthor')")
|
||||
dirs = AppDirs(appname, appauthor=False)
|
||||
for prop in props:
|
||||
print("%s: %s" % (prop, getattr(dirs, prop)))
|
||||
11
venv/Lib/site-packages/pip/_vendor/cachecontrol/__init__.py
Normal file
11
venv/Lib/site-packages/pip/_vendor/cachecontrol/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""CacheControl import Interface.
|
||||
|
||||
Make it easy to import from cachecontrol without long namespaces.
|
||||
"""
|
||||
__author__ = "Eric Larson"
|
||||
__email__ = "eric@ionrock.org"
|
||||
__version__ = "0.12.6"
|
||||
|
||||
from .wrapper import CacheControl
|
||||
from .adapter import CacheControlAdapter
|
||||
from .controller import CacheController
|
||||
57
venv/Lib/site-packages/pip/_vendor/cachecontrol/_cmd.py
Normal file
57
venv/Lib/site-packages/pip/_vendor/cachecontrol/_cmd.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import logging
|
||||
|
||||
from pip._vendor import requests
|
||||
|
||||
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
|
||||
from pip._vendor.cachecontrol.cache import DictCache
|
||||
from pip._vendor.cachecontrol.controller import logger
|
||||
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
def setup_logging():
|
||||
logger.setLevel(logging.DEBUG)
|
||||
handler = logging.StreamHandler()
|
||||
logger.addHandler(handler)
|
||||
|
||||
|
||||
def get_session():
|
||||
adapter = CacheControlAdapter(
|
||||
DictCache(), cache_etags=True, serializer=None, heuristic=None
|
||||
)
|
||||
sess = requests.Session()
|
||||
sess.mount("http://", adapter)
|
||||
sess.mount("https://", adapter)
|
||||
|
||||
sess.cache_controller = adapter.controller
|
||||
return sess
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("url", help="The URL to try and cache")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main(args=None):
|
||||
args = get_args()
|
||||
sess = get_session()
|
||||
|
||||
# Make a request to get a response
|
||||
resp = sess.get(args.url)
|
||||
|
||||
# Turn on logging
|
||||
setup_logging()
|
||||
|
||||
# try setting the cache
|
||||
sess.cache_controller.cache_response(resp.request, resp.raw)
|
||||
|
||||
# Now try to get it
|
||||
if sess.cache_controller.cached_request(resp.request):
|
||||
print("Cached!")
|
||||
else:
|
||||
print("Not cached :(")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
133
venv/Lib/site-packages/pip/_vendor/cachecontrol/adapter.py
Normal file
133
venv/Lib/site-packages/pip/_vendor/cachecontrol/adapter.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import types
|
||||
import functools
|
||||
import zlib
|
||||
|
||||
from pip._vendor.requests.adapters import HTTPAdapter
|
||||
|
||||
from .controller import CacheController
|
||||
from .cache import DictCache
|
||||
from .filewrapper import CallbackFileWrapper
|
||||
|
||||
|
||||
class CacheControlAdapter(HTTPAdapter):
|
||||
invalidating_methods = {"PUT", "DELETE"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cache=None,
|
||||
cache_etags=True,
|
||||
controller_class=None,
|
||||
serializer=None,
|
||||
heuristic=None,
|
||||
cacheable_methods=None,
|
||||
*args,
|
||||
**kw
|
||||
):
|
||||
super(CacheControlAdapter, self).__init__(*args, **kw)
|
||||
self.cache = DictCache() if cache is None else cache
|
||||
self.heuristic = heuristic
|
||||
self.cacheable_methods = cacheable_methods or ("GET",)
|
||||
|
||||
controller_factory = controller_class or CacheController
|
||||
self.controller = controller_factory(
|
||||
self.cache, cache_etags=cache_etags, serializer=serializer
|
||||
)
|
||||
|
||||
def send(self, request, cacheable_methods=None, **kw):
|
||||
"""
|
||||
Send a request. Use the request information to see if it
|
||||
exists in the cache and cache the response if we need to and can.
|
||||
"""
|
||||
cacheable = cacheable_methods or self.cacheable_methods
|
||||
if request.method in cacheable:
|
||||
try:
|
||||
cached_response = self.controller.cached_request(request)
|
||||
except zlib.error:
|
||||
cached_response = None
|
||||
if cached_response:
|
||||
return self.build_response(request, cached_response, from_cache=True)
|
||||
|
||||
# check for etags and add headers if appropriate
|
||||
request.headers.update(self.controller.conditional_headers(request))
|
||||
|
||||
resp = super(CacheControlAdapter, self).send(request, **kw)
|
||||
|
||||
return resp
|
||||
|
||||
def build_response(
|
||||
self, request, response, from_cache=False, cacheable_methods=None
|
||||
):
|
||||
"""
|
||||
Build a response by making a request or using the cache.
|
||||
|
||||
This will end up calling send and returning a potentially
|
||||
cached response
|
||||
"""
|
||||
cacheable = cacheable_methods or self.cacheable_methods
|
||||
if not from_cache and request.method in cacheable:
|
||||
# Check for any heuristics that might update headers
|
||||
# before trying to cache.
|
||||
if self.heuristic:
|
||||
response = self.heuristic.apply(response)
|
||||
|
||||
# apply any expiration heuristics
|
||||
if response.status == 304:
|
||||
# We must have sent an ETag request. This could mean
|
||||
# that we've been expired already or that we simply
|
||||
# have an etag. In either case, we want to try and
|
||||
# update the cache if that is the case.
|
||||
cached_response = self.controller.update_cached_response(
|
||||
request, response
|
||||
)
|
||||
|
||||
if cached_response is not response:
|
||||
from_cache = True
|
||||
|
||||
# We are done with the server response, read a
|
||||
# possible response body (compliant servers will
|
||||
# not return one, but we cannot be 100% sure) and
|
||||
# release the connection back to the pool.
|
||||
response.read(decode_content=False)
|
||||
response.release_conn()
|
||||
|
||||
response = cached_response
|
||||
|
||||
# We always cache the 301 responses
|
||||
elif response.status == 301:
|
||||
self.controller.cache_response(request, response)
|
||||
else:
|
||||
# Wrap the response file with a wrapper that will cache the
|
||||
# response when the stream has been consumed.
|
||||
response._fp = CallbackFileWrapper(
|
||||
response._fp,
|
||||
functools.partial(
|
||||
self.controller.cache_response, request, response
|
||||
),
|
||||
)
|
||||
if response.chunked:
|
||||
super_update_chunk_length = response._update_chunk_length
|
||||
|
||||
def _update_chunk_length(self):
|
||||
super_update_chunk_length()
|
||||
if self.chunk_left == 0:
|
||||
self._fp._close()
|
||||
|
||||
response._update_chunk_length = types.MethodType(
|
||||
_update_chunk_length, response
|
||||
)
|
||||
|
||||
resp = super(CacheControlAdapter, self).build_response(request, response)
|
||||
|
||||
# See if we should invalidate the cache.
|
||||
if request.method in self.invalidating_methods and resp.ok:
|
||||
cache_url = self.controller.cache_url(request.url)
|
||||
self.cache.delete(cache_url)
|
||||
|
||||
# Give the request a from_cache attr to let people use it
|
||||
resp.from_cache = from_cache
|
||||
|
||||
return resp
|
||||
|
||||
def close(self):
|
||||
self.cache.close()
|
||||
super(CacheControlAdapter, self).close()
|
||||
39
venv/Lib/site-packages/pip/_vendor/cachecontrol/cache.py
Normal file
39
venv/Lib/site-packages/pip/_vendor/cachecontrol/cache.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""
|
||||
The cache object API for implementing caches. The default is a thread
|
||||
safe in-memory dictionary.
|
||||
"""
|
||||
from threading import Lock
|
||||
|
||||
|
||||
class BaseCache(object):
|
||||
|
||||
def get(self, key):
|
||||
raise NotImplementedError()
|
||||
|
||||
def set(self, key, value):
|
||||
raise NotImplementedError()
|
||||
|
||||
def delete(self, key):
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
class DictCache(BaseCache):
|
||||
|
||||
def __init__(self, init_dict=None):
|
||||
self.lock = Lock()
|
||||
self.data = init_dict or {}
|
||||
|
||||
def get(self, key):
|
||||
return self.data.get(key, None)
|
||||
|
||||
def set(self, key, value):
|
||||
with self.lock:
|
||||
self.data.update({key: value})
|
||||
|
||||
def delete(self, key):
|
||||
with self.lock:
|
||||
if key in self.data:
|
||||
self.data.pop(key)
|
||||
@@ -0,0 +1,2 @@
|
||||
from .file_cache import FileCache # noqa
|
||||
from .redis_cache import RedisCache # noqa
|
||||
@@ -0,0 +1,146 @@
|
||||
import hashlib
|
||||
import os
|
||||
from textwrap import dedent
|
||||
|
||||
from ..cache import BaseCache
|
||||
from ..controller import CacheController
|
||||
|
||||
try:
|
||||
FileNotFoundError
|
||||
except NameError:
|
||||
# py2.X
|
||||
FileNotFoundError = (IOError, OSError)
|
||||
|
||||
|
||||
def _secure_open_write(filename, fmode):
|
||||
# We only want to write to this file, so open it in write only mode
|
||||
flags = os.O_WRONLY
|
||||
|
||||
# os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
|
||||
# will open *new* files.
|
||||
# We specify this because we want to ensure that the mode we pass is the
|
||||
# mode of the file.
|
||||
flags |= os.O_CREAT | os.O_EXCL
|
||||
|
||||
# Do not follow symlinks to prevent someone from making a symlink that
|
||||
# we follow and insecurely open a cache file.
|
||||
if hasattr(os, "O_NOFOLLOW"):
|
||||
flags |= os.O_NOFOLLOW
|
||||
|
||||
# On Windows we'll mark this file as binary
|
||||
if hasattr(os, "O_BINARY"):
|
||||
flags |= os.O_BINARY
|
||||
|
||||
# Before we open our file, we want to delete any existing file that is
|
||||
# there
|
||||
try:
|
||||
os.remove(filename)
|
||||
except (IOError, OSError):
|
||||
# The file must not exist already, so we can just skip ahead to opening
|
||||
pass
|
||||
|
||||
# Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
|
||||
# race condition happens between the os.remove and this line, that an
|
||||
# error will be raised. Because we utilize a lockfile this should only
|
||||
# happen if someone is attempting to attack us.
|
||||
fd = os.open(filename, flags, fmode)
|
||||
try:
|
||||
return os.fdopen(fd, "wb")
|
||||
|
||||
except:
|
||||
# An error occurred wrapping our FD in a file object
|
||||
os.close(fd)
|
||||
raise
|
||||
|
||||
|
||||
class FileCache(BaseCache):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
directory,
|
||||
forever=False,
|
||||
filemode=0o0600,
|
||||
dirmode=0o0700,
|
||||
use_dir_lock=None,
|
||||
lock_class=None,
|
||||
):
|
||||
|
||||
if use_dir_lock is not None and lock_class is not None:
|
||||
raise ValueError("Cannot use use_dir_lock and lock_class together")
|
||||
|
||||
try:
|
||||
from lockfile import LockFile
|
||||
from lockfile.mkdirlockfile import MkdirLockFile
|
||||
except ImportError:
|
||||
notice = dedent(
|
||||
"""
|
||||
NOTE: In order to use the FileCache you must have
|
||||
lockfile installed. You can install it via pip:
|
||||
pip install lockfile
|
||||
"""
|
||||
)
|
||||
raise ImportError(notice)
|
||||
|
||||
else:
|
||||
if use_dir_lock:
|
||||
lock_class = MkdirLockFile
|
||||
|
||||
elif lock_class is None:
|
||||
lock_class = LockFile
|
||||
|
||||
self.directory = directory
|
||||
self.forever = forever
|
||||
self.filemode = filemode
|
||||
self.dirmode = dirmode
|
||||
self.lock_class = lock_class
|
||||
|
||||
@staticmethod
|
||||
def encode(x):
|
||||
return hashlib.sha224(x.encode()).hexdigest()
|
||||
|
||||
def _fn(self, name):
|
||||
# NOTE: This method should not change as some may depend on it.
|
||||
# See: https://github.com/ionrock/cachecontrol/issues/63
|
||||
hashed = self.encode(name)
|
||||
parts = list(hashed[:5]) + [hashed]
|
||||
return os.path.join(self.directory, *parts)
|
||||
|
||||
def get(self, key):
|
||||
name = self._fn(key)
|
||||
try:
|
||||
with open(name, "rb") as fh:
|
||||
return fh.read()
|
||||
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
def set(self, key, value):
|
||||
name = self._fn(key)
|
||||
|
||||
# Make sure the directory exists
|
||||
try:
|
||||
os.makedirs(os.path.dirname(name), self.dirmode)
|
||||
except (IOError, OSError):
|
||||
pass
|
||||
|
||||
with self.lock_class(name) as lock:
|
||||
# Write our actual file
|
||||
with _secure_open_write(lock.path, self.filemode) as fh:
|
||||
fh.write(value)
|
||||
|
||||
def delete(self, key):
|
||||
name = self._fn(key)
|
||||
if not self.forever:
|
||||
try:
|
||||
os.remove(name)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
|
||||
def url_to_file_path(url, filecache):
|
||||
"""Return the file cache path based on the URL.
|
||||
|
||||
This does not ensure the file exists!
|
||||
"""
|
||||
key = CacheController.cache_url(url)
|
||||
return filecache._fn(key)
|
||||
@@ -0,0 +1,33 @@
|
||||
from __future__ import division
|
||||
|
||||
from datetime import datetime
|
||||
from pip._vendor.cachecontrol.cache import BaseCache
|
||||
|
||||
|
||||
class RedisCache(BaseCache):
|
||||
|
||||
def __init__(self, conn):
|
||||
self.conn = conn
|
||||
|
||||
def get(self, key):
|
||||
return self.conn.get(key)
|
||||
|
||||
def set(self, key, value, expires=None):
|
||||
if not expires:
|
||||
self.conn.set(key, value)
|
||||
else:
|
||||
expires = expires - datetime.utcnow()
|
||||
self.conn.setex(key, int(expires.total_seconds()), value)
|
||||
|
||||
def delete(self, key):
|
||||
self.conn.delete(key)
|
||||
|
||||
def clear(self):
|
||||
"""Helper for clearing all the keys in a database. Use with
|
||||
caution!"""
|
||||
for key in self.conn.keys():
|
||||
self.conn.delete(key)
|
||||
|
||||
def close(self):
|
||||
"""Redis uses connection pooling, no need to close the connection."""
|
||||
pass
|
||||
29
venv/Lib/site-packages/pip/_vendor/cachecontrol/compat.py
Normal file
29
venv/Lib/site-packages/pip/_vendor/cachecontrol/compat.py
Normal file
@@ -0,0 +1,29 @@
|
||||
try:
|
||||
from urllib.parse import urljoin
|
||||
except ImportError:
|
||||
from urlparse import urljoin
|
||||
|
||||
|
||||
try:
|
||||
import cPickle as pickle
|
||||
except ImportError:
|
||||
import pickle
|
||||
|
||||
|
||||
# Handle the case where the requests module has been patched to not have
|
||||
# urllib3 bundled as part of its source.
|
||||
try:
|
||||
from pip._vendor.requests.packages.urllib3.response import HTTPResponse
|
||||
except ImportError:
|
||||
from pip._vendor.urllib3.response import HTTPResponse
|
||||
|
||||
try:
|
||||
from pip._vendor.requests.packages.urllib3.util import is_fp_closed
|
||||
except ImportError:
|
||||
from pip._vendor.urllib3.util import is_fp_closed
|
||||
|
||||
# Replicate some six behaviour
|
||||
try:
|
||||
text_type = unicode
|
||||
except NameError:
|
||||
text_type = str
|
||||
376
venv/Lib/site-packages/pip/_vendor/cachecontrol/controller.py
Normal file
376
venv/Lib/site-packages/pip/_vendor/cachecontrol/controller.py
Normal file
@@ -0,0 +1,376 @@
|
||||
"""
|
||||
The httplib2 algorithms ported for use with requests.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
import calendar
|
||||
import time
|
||||
from email.utils import parsedate_tz
|
||||
|
||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .cache import DictCache
|
||||
from .serialize import Serializer
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
|
||||
|
||||
|
||||
def parse_uri(uri):
|
||||
"""Parses a URI using the regex given in Appendix B of RFC 3986.
|
||||
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
"""
|
||||
groups = URI.match(uri).groups()
|
||||
return (groups[1], groups[3], groups[4], groups[6], groups[8])
|
||||
|
||||
|
||||
class CacheController(object):
|
||||
"""An interface to see if request should cached or not.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, cache=None, cache_etags=True, serializer=None, status_codes=None
|
||||
):
|
||||
self.cache = DictCache() if cache is None else cache
|
||||
self.cache_etags = cache_etags
|
||||
self.serializer = serializer or Serializer()
|
||||
self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
|
||||
|
||||
@classmethod
|
||||
def _urlnorm(cls, uri):
|
||||
"""Normalize the URL to create a safe key for the cache"""
|
||||
(scheme, authority, path, query, fragment) = parse_uri(uri)
|
||||
if not scheme or not authority:
|
||||
raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
|
||||
|
||||
scheme = scheme.lower()
|
||||
authority = authority.lower()
|
||||
|
||||
if not path:
|
||||
path = "/"
|
||||
|
||||
# Could do syntax based normalization of the URI before
|
||||
# computing the digest. See Section 6.2.2 of Std 66.
|
||||
request_uri = query and "?".join([path, query]) or path
|
||||
defrag_uri = scheme + "://" + authority + request_uri
|
||||
|
||||
return defrag_uri
|
||||
|
||||
@classmethod
|
||||
def cache_url(cls, uri):
|
||||
return cls._urlnorm(uri)
|
||||
|
||||
def parse_cache_control(self, headers):
|
||||
known_directives = {
|
||||
# https://tools.ietf.org/html/rfc7234#section-5.2
|
||||
"max-age": (int, True),
|
||||
"max-stale": (int, False),
|
||||
"min-fresh": (int, True),
|
||||
"no-cache": (None, False),
|
||||
"no-store": (None, False),
|
||||
"no-transform": (None, False),
|
||||
"only-if-cached": (None, False),
|
||||
"must-revalidate": (None, False),
|
||||
"public": (None, False),
|
||||
"private": (None, False),
|
||||
"proxy-revalidate": (None, False),
|
||||
"s-maxage": (int, True),
|
||||
}
|
||||
|
||||
cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
|
||||
|
||||
retval = {}
|
||||
|
||||
for cc_directive in cc_headers.split(","):
|
||||
if not cc_directive.strip():
|
||||
continue
|
||||
|
||||
parts = cc_directive.split("=", 1)
|
||||
directive = parts[0].strip()
|
||||
|
||||
try:
|
||||
typ, required = known_directives[directive]
|
||||
except KeyError:
|
||||
logger.debug("Ignoring unknown cache-control directive: %s", directive)
|
||||
continue
|
||||
|
||||
if not typ or not required:
|
||||
retval[directive] = None
|
||||
if typ:
|
||||
try:
|
||||
retval[directive] = typ(parts[1].strip())
|
||||
except IndexError:
|
||||
if required:
|
||||
logger.debug(
|
||||
"Missing value for cache-control " "directive: %s",
|
||||
directive,
|
||||
)
|
||||
except ValueError:
|
||||
logger.debug(
|
||||
"Invalid value for cache-control directive " "%s, must be %s",
|
||||
directive,
|
||||
typ.__name__,
|
||||
)
|
||||
|
||||
return retval
|
||||
|
||||
def cached_request(self, request):
|
||||
"""
|
||||
Return a cached response if it exists in the cache, otherwise
|
||||
return False.
|
||||
"""
|
||||
cache_url = self.cache_url(request.url)
|
||||
logger.debug('Looking up "%s" in the cache', cache_url)
|
||||
cc = self.parse_cache_control(request.headers)
|
||||
|
||||
# Bail out if the request insists on fresh data
|
||||
if "no-cache" in cc:
|
||||
logger.debug('Request header has "no-cache", cache bypassed')
|
||||
return False
|
||||
|
||||
if "max-age" in cc and cc["max-age"] == 0:
|
||||
logger.debug('Request header has "max_age" as 0, cache bypassed')
|
||||
return False
|
||||
|
||||
# Request allows serving from the cache, let's see if we find something
|
||||
cache_data = self.cache.get(cache_url)
|
||||
if cache_data is None:
|
||||
logger.debug("No cache entry available")
|
||||
return False
|
||||
|
||||
# Check whether it can be deserialized
|
||||
resp = self.serializer.loads(request, cache_data)
|
||||
if not resp:
|
||||
logger.warning("Cache entry deserialization failed, entry ignored")
|
||||
return False
|
||||
|
||||
# If we have a cached 301, return it immediately. We don't
|
||||
# need to test our response for other headers b/c it is
|
||||
# intrinsically "cacheable" as it is Permanent.
|
||||
# See:
|
||||
# https://tools.ietf.org/html/rfc7231#section-6.4.2
|
||||
#
|
||||
# Client can try to refresh the value by repeating the request
|
||||
# with cache busting headers as usual (ie no-cache).
|
||||
if resp.status == 301:
|
||||
msg = (
|
||||
'Returning cached "301 Moved Permanently" response '
|
||||
"(ignoring date and etag information)"
|
||||
)
|
||||
logger.debug(msg)
|
||||
return resp
|
||||
|
||||
headers = CaseInsensitiveDict(resp.headers)
|
||||
if not headers or "date" not in headers:
|
||||
if "etag" not in headers:
|
||||
# Without date or etag, the cached response can never be used
|
||||
# and should be deleted.
|
||||
logger.debug("Purging cached response: no date or etag")
|
||||
self.cache.delete(cache_url)
|
||||
logger.debug("Ignoring cached response: no date")
|
||||
return False
|
||||
|
||||
now = time.time()
|
||||
date = calendar.timegm(parsedate_tz(headers["date"]))
|
||||
current_age = max(0, now - date)
|
||||
logger.debug("Current age based on date: %i", current_age)
|
||||
|
||||
# TODO: There is an assumption that the result will be a
|
||||
# urllib3 response object. This may not be best since we
|
||||
# could probably avoid instantiating or constructing the
|
||||
# response until we know we need it.
|
||||
resp_cc = self.parse_cache_control(headers)
|
||||
|
||||
# determine freshness
|
||||
freshness_lifetime = 0
|
||||
|
||||
# Check the max-age pragma in the cache control header
|
||||
if "max-age" in resp_cc:
|
||||
freshness_lifetime = resp_cc["max-age"]
|
||||
logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
|
||||
|
||||
# If there isn't a max-age, check for an expires header
|
||||
elif "expires" in headers:
|
||||
expires = parsedate_tz(headers["expires"])
|
||||
if expires is not None:
|
||||
expire_time = calendar.timegm(expires) - date
|
||||
freshness_lifetime = max(0, expire_time)
|
||||
logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
|
||||
|
||||
# Determine if we are setting freshness limit in the
|
||||
# request. Note, this overrides what was in the response.
|
||||
if "max-age" in cc:
|
||||
freshness_lifetime = cc["max-age"]
|
||||
logger.debug(
|
||||
"Freshness lifetime from request max-age: %i", freshness_lifetime
|
||||
)
|
||||
|
||||
if "min-fresh" in cc:
|
||||
min_fresh = cc["min-fresh"]
|
||||
# adjust our current age by our min fresh
|
||||
current_age += min_fresh
|
||||
logger.debug("Adjusted current age from min-fresh: %i", current_age)
|
||||
|
||||
# Return entry if it is fresh enough
|
||||
if freshness_lifetime > current_age:
|
||||
logger.debug('The response is "fresh", returning cached response')
|
||||
logger.debug("%i > %i", freshness_lifetime, current_age)
|
||||
return resp
|
||||
|
||||
# we're not fresh. If we don't have an Etag, clear it out
|
||||
if "etag" not in headers:
|
||||
logger.debug('The cached response is "stale" with no etag, purging')
|
||||
self.cache.delete(cache_url)
|
||||
|
||||
# return the original handler
|
||||
return False
|
||||
|
||||
def conditional_headers(self, request):
|
||||
cache_url = self.cache_url(request.url)
|
||||
resp = self.serializer.loads(request, self.cache.get(cache_url))
|
||||
new_headers = {}
|
||||
|
||||
if resp:
|
||||
headers = CaseInsensitiveDict(resp.headers)
|
||||
|
||||
if "etag" in headers:
|
||||
new_headers["If-None-Match"] = headers["ETag"]
|
||||
|
||||
if "last-modified" in headers:
|
||||
new_headers["If-Modified-Since"] = headers["Last-Modified"]
|
||||
|
||||
return new_headers
|
||||
|
||||
def cache_response(self, request, response, body=None, status_codes=None):
|
||||
"""
|
||||
Algorithm for caching requests.
|
||||
|
||||
This assumes a requests Response object.
|
||||
"""
|
||||
# From httplib2: Don't cache 206's since we aren't going to
|
||||
# handle byte range requests
|
||||
cacheable_status_codes = status_codes or self.cacheable_status_codes
|
||||
if response.status not in cacheable_status_codes:
|
||||
logger.debug(
|
||||
"Status code %s not in %s", response.status, cacheable_status_codes
|
||||
)
|
||||
return
|
||||
|
||||
response_headers = CaseInsensitiveDict(response.headers)
|
||||
|
||||
# If we've been given a body, our response has a Content-Length, that
|
||||
# Content-Length is valid then we can check to see if the body we've
|
||||
# been given matches the expected size, and if it doesn't we'll just
|
||||
# skip trying to cache it.
|
||||
if (
|
||||
body is not None
|
||||
and "content-length" in response_headers
|
||||
and response_headers["content-length"].isdigit()
|
||||
and int(response_headers["content-length"]) != len(body)
|
||||
):
|
||||
return
|
||||
|
||||
cc_req = self.parse_cache_control(request.headers)
|
||||
cc = self.parse_cache_control(response_headers)
|
||||
|
||||
cache_url = self.cache_url(request.url)
|
||||
logger.debug('Updating cache with response from "%s"', cache_url)
|
||||
|
||||
# Delete it from the cache if we happen to have it stored there
|
||||
no_store = False
|
||||
if "no-store" in cc:
|
||||
no_store = True
|
||||
logger.debug('Response header has "no-store"')
|
||||
if "no-store" in cc_req:
|
||||
no_store = True
|
||||
logger.debug('Request header has "no-store"')
|
||||
if no_store and self.cache.get(cache_url):
|
||||
logger.debug('Purging existing cache entry to honor "no-store"')
|
||||
self.cache.delete(cache_url)
|
||||
if no_store:
|
||||
return
|
||||
|
||||
# https://tools.ietf.org/html/rfc7234#section-4.1:
|
||||
# A Vary header field-value of "*" always fails to match.
|
||||
# Storing such a response leads to a deserialization warning
|
||||
# during cache lookup and is not allowed to ever be served,
|
||||
# so storing it can be avoided.
|
||||
if "*" in response_headers.get("vary", ""):
|
||||
logger.debug('Response header has "Vary: *"')
|
||||
return
|
||||
|
||||
# If we've been given an etag, then keep the response
|
||||
if self.cache_etags and "etag" in response_headers:
|
||||
logger.debug("Caching due to etag")
|
||||
self.cache.set(
|
||||
cache_url, self.serializer.dumps(request, response, body=body)
|
||||
)
|
||||
|
||||
# Add to the cache any 301s. We do this before looking that
|
||||
# the Date headers.
|
||||
elif response.status == 301:
|
||||
logger.debug("Caching permanant redirect")
|
||||
self.cache.set(cache_url, self.serializer.dumps(request, response))
|
||||
|
||||
# Add to the cache if the response headers demand it. If there
|
||||
# is no date header then we can't do anything about expiring
|
||||
# the cache.
|
||||
elif "date" in response_headers:
|
||||
# cache when there is a max-age > 0
|
||||
if "max-age" in cc and cc["max-age"] > 0:
|
||||
logger.debug("Caching b/c date exists and max-age > 0")
|
||||
self.cache.set(
|
||||
cache_url, self.serializer.dumps(request, response, body=body)
|
||||
)
|
||||
|
||||
# If the request can expire, it means we should cache it
|
||||
# in the meantime.
|
||||
elif "expires" in response_headers:
|
||||
if response_headers["expires"]:
|
||||
logger.debug("Caching b/c of expires header")
|
||||
self.cache.set(
|
||||
cache_url, self.serializer.dumps(request, response, body=body)
|
||||
)
|
||||
|
||||
def update_cached_response(self, request, response):
|
||||
"""On a 304 we will get a new set of headers that we want to
|
||||
update our cached value with, assuming we have one.
|
||||
|
||||
This should only ever be called when we've sent an ETag and
|
||||
gotten a 304 as the response.
|
||||
"""
|
||||
cache_url = self.cache_url(request.url)
|
||||
|
||||
cached_response = self.serializer.loads(request, self.cache.get(cache_url))
|
||||
|
||||
if not cached_response:
|
||||
# we didn't have a cached response
|
||||
return response
|
||||
|
||||
# Lets update our headers with the headers from the new request:
|
||||
# http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
|
||||
#
|
||||
# The server isn't supposed to send headers that would make
|
||||
# the cached body invalid. But... just in case, we'll be sure
|
||||
# to strip out ones we know that might be problmatic due to
|
||||
# typical assumptions.
|
||||
excluded_headers = ["content-length"]
|
||||
|
||||
cached_response.headers.update(
|
||||
dict(
|
||||
(k, v)
|
||||
for k, v in response.headers.items()
|
||||
if k.lower() not in excluded_headers
|
||||
)
|
||||
)
|
||||
|
||||
# we want a 200 b/c we have content via the cache
|
||||
cached_response.status = 200
|
||||
|
||||
# update our cache
|
||||
self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
|
||||
|
||||
return cached_response
|
||||
@@ -0,0 +1,80 @@
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class CallbackFileWrapper(object):
|
||||
"""
|
||||
Small wrapper around a fp object which will tee everything read into a
|
||||
buffer, and when that file is closed it will execute a callback with the
|
||||
contents of that buffer.
|
||||
|
||||
All attributes are proxied to the underlying file object.
|
||||
|
||||
This class uses members with a double underscore (__) leading prefix so as
|
||||
not to accidentally shadow an attribute.
|
||||
"""
|
||||
|
||||
def __init__(self, fp, callback):
|
||||
self.__buf = BytesIO()
|
||||
self.__fp = fp
|
||||
self.__callback = callback
|
||||
|
||||
def __getattr__(self, name):
|
||||
# The vaguaries of garbage collection means that self.__fp is
|
||||
# not always set. By using __getattribute__ and the private
|
||||
# name[0] allows looking up the attribute value and raising an
|
||||
# AttributeError when it doesn't exist. This stop thigns from
|
||||
# infinitely recursing calls to getattr in the case where
|
||||
# self.__fp hasn't been set.
|
||||
#
|
||||
# [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
|
||||
fp = self.__getattribute__("_CallbackFileWrapper__fp")
|
||||
return getattr(fp, name)
|
||||
|
||||
def __is_fp_closed(self):
|
||||
try:
|
||||
return self.__fp.fp is None
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
try:
|
||||
return self.__fp.closed
|
||||
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
# We just don't cache it then.
|
||||
# TODO: Add some logging here...
|
||||
return False
|
||||
|
||||
def _close(self):
|
||||
if self.__callback:
|
||||
self.__callback(self.__buf.getvalue())
|
||||
|
||||
# We assign this to None here, because otherwise we can get into
|
||||
# really tricky problems where the CPython interpreter dead locks
|
||||
# because the callback is holding a reference to something which
|
||||
# has a __del__ method. Setting this to None breaks the cycle
|
||||
# and allows the garbage collector to do it's thing normally.
|
||||
self.__callback = None
|
||||
|
||||
def read(self, amt=None):
|
||||
data = self.__fp.read(amt)
|
||||
self.__buf.write(data)
|
||||
if self.__is_fp_closed():
|
||||
self._close()
|
||||
|
||||
return data
|
||||
|
||||
def _safe_read(self, amt):
|
||||
data = self.__fp._safe_read(amt)
|
||||
if amt == 2 and data == b"\r\n":
|
||||
# urllib executes this read to toss the CRLF at the end
|
||||
# of the chunk.
|
||||
return data
|
||||
|
||||
self.__buf.write(data)
|
||||
if self.__is_fp_closed():
|
||||
self._close()
|
||||
|
||||
return data
|
||||
135
venv/Lib/site-packages/pip/_vendor/cachecontrol/heuristics.py
Normal file
135
venv/Lib/site-packages/pip/_vendor/cachecontrol/heuristics.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import calendar
|
||||
import time
|
||||
|
||||
from email.utils import formatdate, parsedate, parsedate_tz
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
|
||||
|
||||
|
||||
def expire_after(delta, date=None):
|
||||
date = date or datetime.utcnow()
|
||||
return date + delta
|
||||
|
||||
|
||||
def datetime_to_header(dt):
|
||||
return formatdate(calendar.timegm(dt.timetuple()))
|
||||
|
||||
|
||||
class BaseHeuristic(object):
|
||||
|
||||
def warning(self, response):
|
||||
"""
|
||||
Return a valid 1xx warning header value describing the cache
|
||||
adjustments.
|
||||
|
||||
The response is provided too allow warnings like 113
|
||||
http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
|
||||
to explicitly say response is over 24 hours old.
|
||||
"""
|
||||
return '110 - "Response is Stale"'
|
||||
|
||||
def update_headers(self, response):
|
||||
"""Update the response headers with any new headers.
|
||||
|
||||
NOTE: This SHOULD always include some Warning header to
|
||||
signify that the response was cached by the client, not
|
||||
by way of the provided headers.
|
||||
"""
|
||||
return {}
|
||||
|
||||
def apply(self, response):
|
||||
updated_headers = self.update_headers(response)
|
||||
|
||||
if updated_headers:
|
||||
response.headers.update(updated_headers)
|
||||
warning_header_value = self.warning(response)
|
||||
if warning_header_value is not None:
|
||||
response.headers.update({"Warning": warning_header_value})
|
||||
|
||||
return response
|
||||
|
||||
|
||||
class OneDayCache(BaseHeuristic):
|
||||
"""
|
||||
Cache the response by providing an expires 1 day in the
|
||||
future.
|
||||
"""
|
||||
|
||||
def update_headers(self, response):
|
||||
headers = {}
|
||||
|
||||
if "expires" not in response.headers:
|
||||
date = parsedate(response.headers["date"])
|
||||
expires = expire_after(timedelta(days=1), date=datetime(*date[:6]))
|
||||
headers["expires"] = datetime_to_header(expires)
|
||||
headers["cache-control"] = "public"
|
||||
return headers
|
||||
|
||||
|
||||
class ExpiresAfter(BaseHeuristic):
|
||||
"""
|
||||
Cache **all** requests for a defined time period.
|
||||
"""
|
||||
|
||||
def __init__(self, **kw):
|
||||
self.delta = timedelta(**kw)
|
||||
|
||||
def update_headers(self, response):
|
||||
expires = expire_after(self.delta)
|
||||
return {"expires": datetime_to_header(expires), "cache-control": "public"}
|
||||
|
||||
def warning(self, response):
|
||||
tmpl = "110 - Automatically cached for %s. Response might be stale"
|
||||
return tmpl % self.delta
|
||||
|
||||
|
||||
class LastModified(BaseHeuristic):
|
||||
"""
|
||||
If there is no Expires header already, fall back on Last-Modified
|
||||
using the heuristic from
|
||||
http://tools.ietf.org/html/rfc7234#section-4.2.2
|
||||
to calculate a reasonable value.
|
||||
|
||||
Firefox also does something like this per
|
||||
https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
|
||||
http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
|
||||
Unlike mozilla we limit this to 24-hr.
|
||||
"""
|
||||
cacheable_by_default_statuses = {
|
||||
200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
|
||||
}
|
||||
|
||||
def update_headers(self, resp):
|
||||
headers = resp.headers
|
||||
|
||||
if "expires" in headers:
|
||||
return {}
|
||||
|
||||
if "cache-control" in headers and headers["cache-control"] != "public":
|
||||
return {}
|
||||
|
||||
if resp.status not in self.cacheable_by_default_statuses:
|
||||
return {}
|
||||
|
||||
if "date" not in headers or "last-modified" not in headers:
|
||||
return {}
|
||||
|
||||
date = calendar.timegm(parsedate_tz(headers["date"]))
|
||||
last_modified = parsedate(headers["last-modified"])
|
||||
if date is None or last_modified is None:
|
||||
return {}
|
||||
|
||||
now = time.time()
|
||||
current_age = max(0, now - date)
|
||||
delta = date - calendar.timegm(last_modified)
|
||||
freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
|
||||
if freshness_lifetime <= current_age:
|
||||
return {}
|
||||
|
||||
expires = date + freshness_lifetime
|
||||
return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
|
||||
|
||||
def warning(self, resp):
|
||||
return None
|
||||
188
venv/Lib/site-packages/pip/_vendor/cachecontrol/serialize.py
Normal file
188
venv/Lib/site-packages/pip/_vendor/cachecontrol/serialize.py
Normal file
@@ -0,0 +1,188 @@
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import zlib
|
||||
|
||||
from pip._vendor import msgpack
|
||||
from pip._vendor.requests.structures import CaseInsensitiveDict
|
||||
|
||||
from .compat import HTTPResponse, pickle, text_type
|
||||
|
||||
|
||||
def _b64_decode_bytes(b):
|
||||
return base64.b64decode(b.encode("ascii"))
|
||||
|
||||
|
||||
def _b64_decode_str(s):
|
||||
return _b64_decode_bytes(s).decode("utf8")
|
||||
|
||||
|
||||
class Serializer(object):
|
||||
|
||||
def dumps(self, request, response, body=None):
|
||||
response_headers = CaseInsensitiveDict(response.headers)
|
||||
|
||||
if body is None:
|
||||
body = response.read(decode_content=False)
|
||||
|
||||
# NOTE: 99% sure this is dead code. I'm only leaving it
|
||||
# here b/c I don't have a test yet to prove
|
||||
# it. Basically, before using
|
||||
# `cachecontrol.filewrapper.CallbackFileWrapper`,
|
||||
# this made an effort to reset the file handle. The
|
||||
# `CallbackFileWrapper` short circuits this code by
|
||||
# setting the body as the content is consumed, the
|
||||
# result being a `body` argument is *always* passed
|
||||
# into cache_response, and in turn,
|
||||
# `Serializer.dump`.
|
||||
response._fp = io.BytesIO(body)
|
||||
|
||||
# NOTE: This is all a bit weird, but it's really important that on
|
||||
# Python 2.x these objects are unicode and not str, even when
|
||||
# they contain only ascii. The problem here is that msgpack
|
||||
# understands the difference between unicode and bytes and we
|
||||
# have it set to differentiate between them, however Python 2
|
||||
# doesn't know the difference. Forcing these to unicode will be
|
||||
# enough to have msgpack know the difference.
|
||||
data = {
|
||||
u"response": {
|
||||
u"body": body,
|
||||
u"headers": dict(
|
||||
(text_type(k), text_type(v)) for k, v in response.headers.items()
|
||||
),
|
||||
u"status": response.status,
|
||||
u"version": response.version,
|
||||
u"reason": text_type(response.reason),
|
||||
u"strict": response.strict,
|
||||
u"decode_content": response.decode_content,
|
||||
}
|
||||
}
|
||||
|
||||
# Construct our vary headers
|
||||
data[u"vary"] = {}
|
||||
if u"vary" in response_headers:
|
||||
varied_headers = response_headers[u"vary"].split(",")
|
||||
for header in varied_headers:
|
||||
header = text_type(header).strip()
|
||||
header_value = request.headers.get(header, None)
|
||||
if header_value is not None:
|
||||
header_value = text_type(header_value)
|
||||
data[u"vary"][header] = header_value
|
||||
|
||||
return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
|
||||
|
||||
def loads(self, request, data):
|
||||
# Short circuit if we've been given an empty set of data
|
||||
if not data:
|
||||
return
|
||||
|
||||
# Determine what version of the serializer the data was serialized
|
||||
# with
|
||||
try:
|
||||
ver, data = data.split(b",", 1)
|
||||
except ValueError:
|
||||
ver = b"cc=0"
|
||||
|
||||
# Make sure that our "ver" is actually a version and isn't a false
|
||||
# positive from a , being in the data stream.
|
||||
if ver[:3] != b"cc=":
|
||||
data = ver + data
|
||||
ver = b"cc=0"
|
||||
|
||||
# Get the version number out of the cc=N
|
||||
ver = ver.split(b"=", 1)[-1].decode("ascii")
|
||||
|
||||
# Dispatch to the actual load method for the given version
|
||||
try:
|
||||
return getattr(self, "_loads_v{}".format(ver))(request, data)
|
||||
|
||||
except AttributeError:
|
||||
# This is a version we don't have a loads function for, so we'll
|
||||
# just treat it as a miss and return None
|
||||
return
|
||||
|
||||
def prepare_response(self, request, cached):
|
||||
"""Verify our vary headers match and construct a real urllib3
|
||||
HTTPResponse object.
|
||||
"""
|
||||
# Special case the '*' Vary value as it means we cannot actually
|
||||
# determine if the cached response is suitable for this request.
|
||||
# This case is also handled in the controller code when creating
|
||||
# a cache entry, but is left here for backwards compatibility.
|
||||
if "*" in cached.get("vary", {}):
|
||||
return
|
||||
|
||||
# Ensure that the Vary headers for the cached response match our
|
||||
# request
|
||||
for header, value in cached.get("vary", {}).items():
|
||||
if request.headers.get(header, None) != value:
|
||||
return
|
||||
|
||||
body_raw = cached["response"].pop("body")
|
||||
|
||||
headers = CaseInsensitiveDict(data=cached["response"]["headers"])
|
||||
if headers.get("transfer-encoding", "") == "chunked":
|
||||
headers.pop("transfer-encoding")
|
||||
|
||||
cached["response"]["headers"] = headers
|
||||
|
||||
try:
|
||||
body = io.BytesIO(body_raw)
|
||||
except TypeError:
|
||||
# This can happen if cachecontrol serialized to v1 format (pickle)
|
||||
# using Python 2. A Python 2 str(byte string) will be unpickled as
|
||||
# a Python 3 str (unicode string), which will cause the above to
|
||||
# fail with:
|
||||
#
|
||||
# TypeError: 'str' does not support the buffer interface
|
||||
body = io.BytesIO(body_raw.encode("utf8"))
|
||||
|
||||
return HTTPResponse(body=body, preload_content=False, **cached["response"])
|
||||
|
||||
def _loads_v0(self, request, data):
|
||||
# The original legacy cache data. This doesn't contain enough
|
||||
# information to construct everything we need, so we'll treat this as
|
||||
# a miss.
|
||||
return
|
||||
|
||||
def _loads_v1(self, request, data):
|
||||
try:
|
||||
cached = pickle.loads(data)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
return self.prepare_response(request, cached)
|
||||
|
||||
def _loads_v2(self, request, data):
|
||||
try:
|
||||
cached = json.loads(zlib.decompress(data).decode("utf8"))
|
||||
except (ValueError, zlib.error):
|
||||
return
|
||||
|
||||
# We need to decode the items that we've base64 encoded
|
||||
cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
|
||||
cached["response"]["headers"] = dict(
|
||||
(_b64_decode_str(k), _b64_decode_str(v))
|
||||
for k, v in cached["response"]["headers"].items()
|
||||
)
|
||||
cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
|
||||
cached["vary"] = dict(
|
||||
(_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
|
||||
for k, v in cached["vary"].items()
|
||||
)
|
||||
|
||||
return self.prepare_response(request, cached)
|
||||
|
||||
def _loads_v3(self, request, data):
|
||||
# Due to Python 2 encoding issues, it's impossible to know for sure
|
||||
# exactly how to load v3 entries, thus we'll treat these as a miss so
|
||||
# that they get rewritten out as v4 entries.
|
||||
return
|
||||
|
||||
def _loads_v4(self, request, data):
|
||||
try:
|
||||
cached = msgpack.loads(data, raw=False)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
return self.prepare_response(request, cached)
|
||||
29
venv/Lib/site-packages/pip/_vendor/cachecontrol/wrapper.py
Normal file
29
venv/Lib/site-packages/pip/_vendor/cachecontrol/wrapper.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from .adapter import CacheControlAdapter
|
||||
from .cache import DictCache
|
||||
|
||||
|
||||
def CacheControl(
|
||||
sess,
|
||||
cache=None,
|
||||
cache_etags=True,
|
||||
serializer=None,
|
||||
heuristic=None,
|
||||
controller_class=None,
|
||||
adapter_class=None,
|
||||
cacheable_methods=None,
|
||||
):
|
||||
|
||||
cache = DictCache() if cache is None else cache
|
||||
adapter_class = adapter_class or CacheControlAdapter
|
||||
adapter = adapter_class(
|
||||
cache,
|
||||
cache_etags=cache_etags,
|
||||
serializer=serializer,
|
||||
heuristic=heuristic,
|
||||
controller_class=controller_class,
|
||||
cacheable_methods=cacheable_methods,
|
||||
)
|
||||
sess.mount("http://", adapter)
|
||||
sess.mount("https://", adapter)
|
||||
|
||||
return sess
|
||||
3
venv/Lib/site-packages/pip/_vendor/certifi/__init__.py
Normal file
3
venv/Lib/site-packages/pip/_vendor/certifi/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .core import contents, where
|
||||
|
||||
__version__ = "2021.05.30"
|
||||
12
venv/Lib/site-packages/pip/_vendor/certifi/__main__.py
Normal file
12
venv/Lib/site-packages/pip/_vendor/certifi/__main__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import argparse
|
||||
|
||||
from pip._vendor.certifi import contents, where
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-c", "--contents", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.contents:
|
||||
print(contents())
|
||||
else:
|
||||
print(where())
|
||||
4257
venv/Lib/site-packages/pip/_vendor/certifi/cacert.pem
Normal file
4257
venv/Lib/site-packages/pip/_vendor/certifi/cacert.pem
Normal file
File diff suppressed because it is too large
Load Diff
76
venv/Lib/site-packages/pip/_vendor/certifi/core.py
Normal file
76
venv/Lib/site-packages/pip/_vendor/certifi/core.py
Normal file
@@ -0,0 +1,76 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
certifi.py
|
||||
~~~~~~~~~~
|
||||
|
||||
This module returns the installation location of cacert.pem or its contents.
|
||||
"""
|
||||
import os
|
||||
|
||||
|
||||
class _PipPatchedCertificate(Exception):
|
||||
pass
|
||||
|
||||
|
||||
try:
|
||||
# Return a certificate file on disk for a standalone pip zipapp running in
|
||||
# an isolated build environment to use. Passing --cert to the standalone
|
||||
# pip does not work since requests calls where() unconditionally on import.
|
||||
_PIP_STANDALONE_CERT = os.environ.get("_PIP_STANDALONE_CERT")
|
||||
if _PIP_STANDALONE_CERT:
|
||||
def where():
|
||||
return _PIP_STANDALONE_CERT
|
||||
raise _PipPatchedCertificate()
|
||||
|
||||
from importlib.resources import path as get_path, read_text
|
||||
|
||||
_CACERT_CTX = None
|
||||
_CACERT_PATH = None
|
||||
|
||||
def where():
|
||||
# This is slightly terrible, but we want to delay extracting the file
|
||||
# in cases where we're inside of a zipimport situation until someone
|
||||
# actually calls where(), but we don't want to re-extract the file
|
||||
# on every call of where(), so we'll do it once then store it in a
|
||||
# global variable.
|
||||
global _CACERT_CTX
|
||||
global _CACERT_PATH
|
||||
if _CACERT_PATH is None:
|
||||
# This is slightly janky, the importlib.resources API wants you to
|
||||
# manage the cleanup of this file, so it doesn't actually return a
|
||||
# path, it returns a context manager that will give you the path
|
||||
# when you enter it and will do any cleanup when you leave it. In
|
||||
# the common case of not needing a temporary file, it will just
|
||||
# return the file system location and the __exit__() is a no-op.
|
||||
#
|
||||
# We also have to hold onto the actual context manager, because
|
||||
# it will do the cleanup whenever it gets garbage collected, so
|
||||
# we will also store that at the global level as well.
|
||||
_CACERT_CTX = get_path("pip._vendor.certifi", "cacert.pem")
|
||||
_CACERT_PATH = str(_CACERT_CTX.__enter__())
|
||||
|
||||
return _CACERT_PATH
|
||||
|
||||
except _PipPatchedCertificate:
|
||||
pass
|
||||
|
||||
except ImportError:
|
||||
# This fallback will work for Python versions prior to 3.7 that lack the
|
||||
# importlib.resources module but relies on the existing `where` function
|
||||
# so won't address issues with environments like PyOxidizer that don't set
|
||||
# __file__ on modules.
|
||||
def read_text(_module, _path, encoding="ascii"):
|
||||
with open(where(), "r", encoding=encoding) as data:
|
||||
return data.read()
|
||||
|
||||
# If we don't have importlib.resources, then we will just do the old logic
|
||||
# of assuming we're on the filesystem and munge the path directly.
|
||||
def where():
|
||||
f = os.path.dirname(__file__)
|
||||
|
||||
return os.path.join(f, "cacert.pem")
|
||||
|
||||
|
||||
def contents():
|
||||
return read_text("certifi", "cacert.pem", encoding="ascii")
|
||||
83
venv/Lib/site-packages/pip/_vendor/chardet/__init__.py
Normal file
83
venv/Lib/site-packages/pip/_vendor/chardet/__init__.py
Normal file
@@ -0,0 +1,83 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
from .universaldetector import UniversalDetector
|
||||
from .enums import InputState
|
||||
from .version import __version__, VERSION
|
||||
|
||||
|
||||
__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION']
|
||||
|
||||
|
||||
def detect(byte_str):
|
||||
"""
|
||||
Detect the encoding of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
return detector.close()
|
||||
|
||||
|
||||
def detect_all(byte_str):
|
||||
"""
|
||||
Detect all the possible encodings of the given byte string.
|
||||
|
||||
:param byte_str: The byte sequence to examine.
|
||||
:type byte_str: ``bytes`` or ``bytearray``
|
||||
"""
|
||||
if not isinstance(byte_str, bytearray):
|
||||
if not isinstance(byte_str, bytes):
|
||||
raise TypeError('Expected object of type bytes or bytearray, got: '
|
||||
'{}'.format(type(byte_str)))
|
||||
else:
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
detector = UniversalDetector()
|
||||
detector.feed(byte_str)
|
||||
detector.close()
|
||||
|
||||
if detector._input_state == InputState.HIGH_BYTE:
|
||||
results = []
|
||||
for prober in detector._charset_probers:
|
||||
if prober.get_confidence() > detector.MINIMUM_THRESHOLD:
|
||||
charset_name = prober.charset_name
|
||||
lower_charset_name = prober.charset_name.lower()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
if lower_charset_name.startswith('iso-8859'):
|
||||
if detector._has_win_bytes:
|
||||
charset_name = detector.ISO_WIN_MAP.get(lower_charset_name,
|
||||
charset_name)
|
||||
results.append({
|
||||
'encoding': charset_name,
|
||||
'confidence': prober.get_confidence(),
|
||||
'language': prober.language,
|
||||
})
|
||||
if len(results) > 0:
|
||||
return sorted(results, key=lambda result: -result['confidence'])
|
||||
|
||||
return [detector.result]
|
||||
386
venv/Lib/site-packages/pip/_vendor/chardet/big5freq.py
Normal file
386
venv/Lib/site-packages/pip/_vendor/chardet/big5freq.py
Normal file
@@ -0,0 +1,386 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Big5 frequency table
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http://www.edu.tw:81/mandr/>
|
||||
#
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
#Char to FreqOrder table
|
||||
BIG5_TABLE_SIZE = 5376
|
||||
|
||||
BIG5_CHAR_TO_FREQ_ORDER = (
|
||||
1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16
|
||||
3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32
|
||||
1198,3972,3350,4202, 410,2215, 302, 590, 361,1964, 8, 204, 58,4510,5009,1932, # 48
|
||||
63,5010,5011, 317,1614, 75, 222, 159,4203,2417,1480,5012,3555,3091, 224,2822, # 64
|
||||
3682, 3, 10,3973,1471, 29,2787,1135,2866,1940, 873, 130,3275,1123, 312,5013, # 80
|
||||
4511,2052, 507, 252, 682,5014, 142,1915, 124, 206,2947, 34,3556,3204, 64, 604, # 96
|
||||
5015,2501,1977,1978, 155,1991, 645, 641,1606,5016,3452, 337, 72, 406,5017, 80, # 112
|
||||
630, 238,3205,1509, 263, 939,1092,2654, 756,1440,1094,3453, 449, 69,2987, 591, # 128
|
||||
179,2096, 471, 115,2035,1844, 60, 50,2988, 134, 806,1869, 734,2036,3454, 180, # 144
|
||||
995,1607, 156, 537,2907, 688,5018, 319,1305, 779,2145, 514,2379, 298,4512, 359, # 160
|
||||
2502, 90,2716,1338, 663, 11, 906,1099,2553, 20,2441, 182, 532,1716,5019, 732, # 176
|
||||
1376,4204,1311,1420,3206, 25,2317,1056, 113, 399, 382,1950, 242,3455,2474, 529, # 192
|
||||
3276, 475,1447,3683,5020, 117, 21, 656, 810,1297,2300,2334,3557,5021, 126,4205, # 208
|
||||
706, 456, 150, 613,4513, 71,1118,2037,4206, 145,3092, 85, 835, 486,2115,1246, # 224
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,5022,2128,2359, 347,3815, 221, # 240
|
||||
3558,3135,5023,1956,1153,4207, 83, 296,1199,3093, 192, 624, 93,5024, 822,1898, # 256
|
||||
2823,3136, 795,2065, 991,1554,1542,1592, 27, 43,2867, 859, 139,1456, 860,4514, # 272
|
||||
437, 712,3974, 164,2397,3137, 695, 211,3037,2097, 195,3975,1608,3559,3560,3684, # 288
|
||||
3976, 234, 811,2989,2098,3977,2233,1441,3561,1615,2380, 668,2077,1638, 305, 228, # 304
|
||||
1664,4515, 467, 415,5025, 262,2099,1593, 239, 108, 300, 200,1033, 512,1247,2078, # 320
|
||||
5026,5027,2176,3207,3685,2682, 593, 845,1062,3277, 88,1723,2038,3978,1951, 212, # 336
|
||||
266, 152, 149, 468,1899,4208,4516, 77, 187,5028,3038, 37, 5,2990,5029,3979, # 352
|
||||
5030,5031, 39,2524,4517,2908,3208,2079, 55, 148, 74,4518, 545, 483,1474,1029, # 368
|
||||
1665, 217,1870,1531,3138,1104,2655,4209, 24, 172,3562, 900,3980,3563,3564,4519, # 384
|
||||
32,1408,2824,1312, 329, 487,2360,2251,2717, 784,2683, 4,3039,3351,1427,1789, # 400
|
||||
188, 109, 499,5032,3686,1717,1790, 888,1217,3040,4520,5033,3565,5034,3352,1520, # 416
|
||||
3687,3981, 196,1034, 775,5035,5036, 929,1816, 249, 439, 38,5037,1063,5038, 794, # 432
|
||||
3982,1435,2301, 46, 178,3278,2066,5039,2381,5040, 214,1709,4521, 804, 35, 707, # 448
|
||||
324,3688,1601,2554, 140, 459,4210,5041,5042,1365, 839, 272, 978,2262,2580,3456, # 464
|
||||
2129,1363,3689,1423, 697, 100,3094, 48, 70,1231, 495,3139,2196,5043,1294,5044, # 480
|
||||
2080, 462, 586,1042,3279, 853, 256, 988, 185,2382,3457,1698, 434,1084,5045,3458, # 496
|
||||
314,2625,2788,4522,2335,2336, 569,2285, 637,1817,2525, 757,1162,1879,1616,3459, # 512
|
||||
287,1577,2116, 768,4523,1671,2868,3566,2526,1321,3816, 909,2418,5046,4211, 933, # 528
|
||||
3817,4212,2053,2361,1222,4524, 765,2419,1322, 786,4525,5047,1920,1462,1677,2909, # 544
|
||||
1699,5048,4526,1424,2442,3140,3690,2600,3353,1775,1941,3460,3983,4213, 309,1369, # 560
|
||||
1130,2825, 364,2234,1653,1299,3984,3567,3985,3986,2656, 525,1085,3041, 902,2001, # 576
|
||||
1475, 964,4527, 421,1845,1415,1057,2286, 940,1364,3141, 376,4528,4529,1381, 7, # 592
|
||||
2527, 983,2383, 336,1710,2684,1846, 321,3461, 559,1131,3042,2752,1809,1132,1313, # 608
|
||||
265,1481,1858,5049, 352,1203,2826,3280, 167,1089, 420,2827, 776, 792,1724,3568, # 624
|
||||
4214,2443,3281,5050,4215,5051, 446, 229, 333,2753, 901,3818,1200,1557,4530,2657, # 640
|
||||
1921, 395,2754,2685,3819,4216,1836, 125, 916,3209,2626,4531,5052,5053,3820,5054, # 656
|
||||
5055,5056,4532,3142,3691,1133,2555,1757,3462,1510,2318,1409,3569,5057,2146, 438, # 672
|
||||
2601,2910,2384,3354,1068, 958,3043, 461, 311,2869,2686,4217,1916,3210,4218,1979, # 688
|
||||
383, 750,2755,2627,4219, 274, 539, 385,1278,1442,5058,1154,1965, 384, 561, 210, # 704
|
||||
98,1295,2556,3570,5059,1711,2420,1482,3463,3987,2911,1257, 129,5060,3821, 642, # 720
|
||||
523,2789,2790,2658,5061, 141,2235,1333, 68, 176, 441, 876, 907,4220, 603,2602, # 736
|
||||
710, 171,3464, 404, 549, 18,3143,2398,1410,3692,1666,5062,3571,4533,2912,4534, # 752
|
||||
5063,2991, 368,5064, 146, 366, 99, 871,3693,1543, 748, 807,1586,1185, 22,2263, # 768
|
||||
379,3822,3211,5065,3212, 505,1942,2628,1992,1382,2319,5066, 380,2362, 218, 702, # 784
|
||||
1818,1248,3465,3044,3572,3355,3282,5067,2992,3694, 930,3283,3823,5068, 59,5069, # 800
|
||||
585, 601,4221, 497,3466,1112,1314,4535,1802,5070,1223,1472,2177,5071, 749,1837, # 816
|
||||
690,1900,3824,1773,3988,1476, 429,1043,1791,2236,2117, 917,4222, 447,1086,1629, # 832
|
||||
5072, 556,5073,5074,2021,1654, 844,1090, 105, 550, 966,1758,2828,1008,1783, 686, # 848
|
||||
1095,5075,2287, 793,1602,5076,3573,2603,4536,4223,2948,2302,4537,3825, 980,2503, # 864
|
||||
544, 353, 527,4538, 908,2687,2913,5077, 381,2629,1943,1348,5078,1341,1252, 560, # 880
|
||||
3095,5079,3467,2870,5080,2054, 973, 886,2081, 143,4539,5081,5082, 157,3989, 496, # 896
|
||||
4224, 57, 840, 540,2039,4540,4541,3468,2118,1445, 970,2264,1748,1966,2082,4225, # 912
|
||||
3144,1234,1776,3284,2829,3695, 773,1206,2130,1066,2040,1326,3990,1738,1725,4226, # 928
|
||||
279,3145, 51,1544,2604, 423,1578,2131,2067, 173,4542,1880,5083,5084,1583, 264, # 944
|
||||
610,3696,4543,2444, 280, 154,5085,5086,5087,1739, 338,1282,3096, 693,2871,1411, # 960
|
||||
1074,3826,2445,5088,4544,5089,5090,1240, 952,2399,5091,2914,1538,2688, 685,1483, # 976
|
||||
4227,2475,1436, 953,4228,2055,4545, 671,2400, 79,4229,2446,3285, 608, 567,2689, # 992
|
||||
3469,4230,4231,1691, 393,1261,1792,2401,5092,4546,5093,5094,5095,5096,1383,1672, # 1008
|
||||
3827,3213,1464, 522,1119, 661,1150, 216, 675,4547,3991,1432,3574, 609,4548,2690, # 1024
|
||||
2402,5097,5098,5099,4232,3045, 0,5100,2476, 315, 231,2447, 301,3356,4549,2385, # 1040
|
||||
5101, 233,4233,3697,1819,4550,4551,5102, 96,1777,1315,2083,5103, 257,5104,1810, # 1056
|
||||
3698,2718,1139,1820,4234,2022,1124,2164,2791,1778,2659,5105,3097, 363,1655,3214, # 1072
|
||||
5106,2993,5107,5108,5109,3992,1567,3993, 718, 103,3215, 849,1443, 341,3357,2949, # 1088
|
||||
1484,5110,1712, 127, 67, 339,4235,2403, 679,1412, 821,5111,5112, 834, 738, 351, # 1104
|
||||
2994,2147, 846, 235,1497,1881, 418,1993,3828,2719, 186,1100,2148,2756,3575,1545, # 1120
|
||||
1355,2950,2872,1377, 583,3994,4236,2581,2995,5113,1298,3699,1078,2557,3700,2363, # 1136
|
||||
78,3829,3830, 267,1289,2100,2002,1594,4237, 348, 369,1274,2197,2178,1838,4552, # 1152
|
||||
1821,2830,3701,2757,2288,2003,4553,2951,2758, 144,3358, 882,4554,3995,2759,3470, # 1168
|
||||
4555,2915,5114,4238,1726, 320,5115,3996,3046, 788,2996,5116,2831,1774,1327,2873, # 1184
|
||||
3997,2832,5117,1306,4556,2004,1700,3831,3576,2364,2660, 787,2023, 506, 824,3702, # 1200
|
||||
534, 323,4557,1044,3359,2024,1901, 946,3471,5118,1779,1500,1678,5119,1882,4558, # 1216
|
||||
165, 243,4559,3703,2528, 123, 683,4239, 764,4560, 36,3998,1793, 589,2916, 816, # 1232
|
||||
626,1667,3047,2237,1639,1555,1622,3832,3999,5120,4000,2874,1370,1228,1933, 891, # 1248
|
||||
2084,2917, 304,4240,5121, 292,2997,2720,3577, 691,2101,4241,1115,4561, 118, 662, # 1264
|
||||
5122, 611,1156, 854,2386,1316,2875, 2, 386, 515,2918,5123,5124,3286, 868,2238, # 1280
|
||||
1486, 855,2661, 785,2216,3048,5125,1040,3216,3578,5126,3146, 448,5127,1525,5128, # 1296
|
||||
2165,4562,5129,3833,5130,4242,2833,3579,3147, 503, 818,4001,3148,1568, 814, 676, # 1312
|
||||
1444, 306,1749,5131,3834,1416,1030, 197,1428, 805,2834,1501,4563,5132,5133,5134, # 1328
|
||||
1994,5135,4564,5136,5137,2198, 13,2792,3704,2998,3149,1229,1917,5138,3835,2132, # 1344
|
||||
5139,4243,4565,2404,3580,5140,2217,1511,1727,1120,5141,5142, 646,3836,2448, 307, # 1360
|
||||
5143,5144,1595,3217,5145,5146,5147,3705,1113,1356,4002,1465,2529,2530,5148, 519, # 1376
|
||||
5149, 128,2133, 92,2289,1980,5150,4003,1512, 342,3150,2199,5151,2793,2218,1981, # 1392
|
||||
3360,4244, 290,1656,1317, 789, 827,2365,5152,3837,4566, 562, 581,4004,5153, 401, # 1408
|
||||
4567,2252, 94,4568,5154,1399,2794,5155,1463,2025,4569,3218,1944,5156, 828,1105, # 1424
|
||||
4245,1262,1394,5157,4246, 605,4570,5158,1784,2876,5159,2835, 819,2102, 578,2200, # 1440
|
||||
2952,5160,1502, 436,3287,4247,3288,2836,4005,2919,3472,3473,5161,2721,2320,5162, # 1456
|
||||
5163,2337,2068, 23,4571, 193, 826,3838,2103, 699,1630,4248,3098, 390,1794,1064, # 1472
|
||||
3581,5164,1579,3099,3100,1400,5165,4249,1839,1640,2877,5166,4572,4573, 137,4250, # 1488
|
||||
598,3101,1967, 780, 104, 974,2953,5167, 278, 899, 253, 402, 572, 504, 493,1339, # 1504
|
||||
5168,4006,1275,4574,2582,2558,5169,3706,3049,3102,2253, 565,1334,2722, 863, 41, # 1520
|
||||
5170,5171,4575,5172,1657,2338, 19, 463,2760,4251, 606,5173,2999,3289,1087,2085, # 1536
|
||||
1323,2662,3000,5174,1631,1623,1750,4252,2691,5175,2878, 791,2723,2663,2339, 232, # 1552
|
||||
2421,5176,3001,1498,5177,2664,2630, 755,1366,3707,3290,3151,2026,1609, 119,1918, # 1568
|
||||
3474, 862,1026,4253,5178,4007,3839,4576,4008,4577,2265,1952,2477,5179,1125, 817, # 1584
|
||||
4254,4255,4009,1513,1766,2041,1487,4256,3050,3291,2837,3840,3152,5180,5181,1507, # 1600
|
||||
5182,2692, 733, 40,1632,1106,2879, 345,4257, 841,2531, 230,4578,3002,1847,3292, # 1616
|
||||
3475,5183,1263, 986,3476,5184, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562, # 1632
|
||||
4010,4011,2954, 967,2761,2665,1349, 592,2134,1692,3361,3003,1995,4258,1679,4012, # 1648
|
||||
1902,2188,5185, 739,3708,2724,1296,1290,5186,4259,2201,2202,1922,1563,2605,2559, # 1664
|
||||
1871,2762,3004,5187, 435,5188, 343,1108, 596, 17,1751,4579,2239,3477,3709,5189, # 1680
|
||||
4580, 294,3582,2955,1693, 477, 979, 281,2042,3583, 643,2043,3710,2631,2795,2266, # 1696
|
||||
1031,2340,2135,2303,3584,4581, 367,1249,2560,5190,3585,5191,4582,1283,3362,2005, # 1712
|
||||
240,1762,3363,4583,4584, 836,1069,3153, 474,5192,2149,2532, 268,3586,5193,3219, # 1728
|
||||
1521,1284,5194,1658,1546,4260,5195,3587,3588,5196,4261,3364,2693,1685,4262, 961, # 1744
|
||||
1673,2632, 190,2006,2203,3841,4585,4586,5197, 570,2504,3711,1490,5198,4587,2633, # 1760
|
||||
3293,1957,4588, 584,1514, 396,1045,1945,5199,4589,1968,2449,5200,5201,4590,4013, # 1776
|
||||
619,5202,3154,3294, 215,2007,2796,2561,3220,4591,3221,4592, 763,4263,3842,4593, # 1792
|
||||
5203,5204,1958,1767,2956,3365,3712,1174, 452,1477,4594,3366,3155,5205,2838,1253, # 1808
|
||||
2387,2189,1091,2290,4264, 492,5206, 638,1169,1825,2136,1752,4014, 648, 926,1021, # 1824
|
||||
1324,4595, 520,4596, 997, 847,1007, 892,4597,3843,2267,1872,3713,2405,1785,4598, # 1840
|
||||
1953,2957,3103,3222,1728,4265,2044,3714,4599,2008,1701,3156,1551, 30,2268,4266, # 1856
|
||||
5207,2027,4600,3589,5208, 501,5209,4267, 594,3478,2166,1822,3590,3479,3591,3223, # 1872
|
||||
829,2839,4268,5210,1680,3157,1225,4269,5211,3295,4601,4270,3158,2341,5212,4602, # 1888
|
||||
4271,5213,4015,4016,5214,1848,2388,2606,3367,5215,4603, 374,4017, 652,4272,4273, # 1904
|
||||
375,1140, 798,5216,5217,5218,2366,4604,2269, 546,1659, 138,3051,2450,4605,5219, # 1920
|
||||
2254, 612,1849, 910, 796,3844,1740,1371, 825,3845,3846,5220,2920,2562,5221, 692, # 1936
|
||||
444,3052,2634, 801,4606,4274,5222,1491, 244,1053,3053,4275,4276, 340,5223,4018, # 1952
|
||||
1041,3005, 293,1168, 87,1357,5224,1539, 959,5225,2240, 721, 694,4277,3847, 219, # 1968
|
||||
1478, 644,1417,3368,2666,1413,1401,1335,1389,4019,5226,5227,3006,2367,3159,1826, # 1984
|
||||
730,1515, 184,2840, 66,4607,5228,1660,2958, 246,3369, 378,1457, 226,3480, 975, # 2000
|
||||
4020,2959,1264,3592, 674, 696,5229, 163,5230,1141,2422,2167, 713,3593,3370,4608, # 2016
|
||||
4021,5231,5232,1186, 15,5233,1079,1070,5234,1522,3224,3594, 276,1050,2725, 758, # 2032
|
||||
1126, 653,2960,3296,5235,2342, 889,3595,4022,3104,3007, 903,1250,4609,4023,3481, # 2048
|
||||
3596,1342,1681,1718, 766,3297, 286, 89,2961,3715,5236,1713,5237,2607,3371,3008, # 2064
|
||||
5238,2962,2219,3225,2880,5239,4610,2505,2533, 181, 387,1075,4024, 731,2190,3372, # 2080
|
||||
5240,3298, 310, 313,3482,2304, 770,4278, 54,3054, 189,4611,3105,3848,4025,5241, # 2096
|
||||
1230,1617,1850, 355,3597,4279,4612,3373, 111,4280,3716,1350,3160,3483,3055,4281, # 2112
|
||||
2150,3299,3598,5242,2797,4026,4027,3009, 722,2009,5243,1071, 247,1207,2343,2478, # 2128
|
||||
1378,4613,2010, 864,1437,1214,4614, 373,3849,1142,2220, 667,4615, 442,2763,2563, # 2144
|
||||
3850,4028,1969,4282,3300,1840, 837, 170,1107, 934,1336,1883,5244,5245,2119,4283, # 2160
|
||||
2841, 743,1569,5246,4616,4284, 582,2389,1418,3484,5247,1803,5248, 357,1395,1729, # 2176
|
||||
3717,3301,2423,1564,2241,5249,3106,3851,1633,4617,1114,2086,4285,1532,5250, 482, # 2192
|
||||
2451,4618,5251,5252,1492, 833,1466,5253,2726,3599,1641,2842,5254,1526,1272,3718, # 2208
|
||||
4286,1686,1795, 416,2564,1903,1954,1804,5255,3852,2798,3853,1159,2321,5256,2881, # 2224
|
||||
4619,1610,1584,3056,2424,2764, 443,3302,1163,3161,5257,5258,4029,5259,4287,2506, # 2240
|
||||
3057,4620,4030,3162,2104,1647,3600,2011,1873,4288,5260,4289, 431,3485,5261, 250, # 2256
|
||||
97, 81,4290,5262,1648,1851,1558, 160, 848,5263, 866, 740,1694,5264,2204,2843, # 2272
|
||||
3226,4291,4621,3719,1687, 950,2479, 426, 469,3227,3720,3721,4031,5265,5266,1188, # 2288
|
||||
424,1996, 861,3601,4292,3854,2205,2694, 168,1235,3602,4293,5267,2087,1674,4622, # 2304
|
||||
3374,3303, 220,2565,1009,5268,3855, 670,3010, 332,1208, 717,5269,5270,3603,2452, # 2320
|
||||
4032,3375,5271, 513,5272,1209,2882,3376,3163,4623,1080,5273,5274,5275,5276,2534, # 2336
|
||||
3722,3604, 815,1587,4033,4034,5277,3605,3486,3856,1254,4624,1328,3058,1390,4035, # 2352
|
||||
1741,4036,3857,4037,5278, 236,3858,2453,3304,5279,5280,3723,3859,1273,3860,4625, # 2368
|
||||
5281, 308,5282,4626, 245,4627,1852,2480,1307,2583, 430, 715,2137,2454,5283, 270, # 2384
|
||||
199,2883,4038,5284,3606,2727,1753, 761,1754, 725,1661,1841,4628,3487,3724,5285, # 2400
|
||||
5286, 587, 14,3305, 227,2608, 326, 480,2270, 943,2765,3607, 291, 650,1884,5287, # 2416
|
||||
1702,1226, 102,1547, 62,3488, 904,4629,3489,1164,4294,5288,5289,1224,1548,2766, # 2432
|
||||
391, 498,1493,5290,1386,1419,5291,2056,1177,4630, 813, 880,1081,2368, 566,1145, # 2448
|
||||
4631,2291,1001,1035,2566,2609,2242, 394,1286,5292,5293,2069,5294, 86,1494,1730, # 2464
|
||||
4039, 491,1588, 745, 897,2963, 843,3377,4040,2767,2884,3306,1768, 998,2221,2070, # 2480
|
||||
397,1827,1195,1970,3725,3011,3378, 284,5295,3861,2507,2138,2120,1904,5296,4041, # 2496
|
||||
2151,4042,4295,1036,3490,1905, 114,2567,4296, 209,1527,5297,5298,2964,2844,2635, # 2512
|
||||
2390,2728,3164, 812,2568,5299,3307,5300,1559, 737,1885,3726,1210, 885, 28,2695, # 2528
|
||||
3608,3862,5301,4297,1004,1780,4632,5302, 346,1982,2222,2696,4633,3863,1742, 797, # 2544
|
||||
1642,4043,1934,1072,1384,2152, 896,4044,3308,3727,3228,2885,3609,5303,2569,1959, # 2560
|
||||
4634,2455,1786,5304,5305,5306,4045,4298,1005,1308,3728,4299,2729,4635,4636,1528, # 2576
|
||||
2610, 161,1178,4300,1983, 987,4637,1101,4301, 631,4046,1157,3229,2425,1343,1241, # 2592
|
||||
1016,2243,2570, 372, 877,2344,2508,1160, 555,1935, 911,4047,5307, 466,1170, 169, # 2608
|
||||
1051,2921,2697,3729,2481,3012,1182,2012,2571,1251,2636,5308, 992,2345,3491,1540, # 2624
|
||||
2730,1201,2071,2406,1997,2482,5309,4638, 528,1923,2191,1503,1874,1570,2369,3379, # 2640
|
||||
3309,5310, 557,1073,5311,1828,3492,2088,2271,3165,3059,3107, 767,3108,2799,4639, # 2656
|
||||
1006,4302,4640,2346,1267,2179,3730,3230, 778,4048,3231,2731,1597,2667,5312,4641, # 2672
|
||||
5313,3493,5314,5315,5316,3310,2698,1433,3311, 131, 95,1504,4049, 723,4303,3166, # 2688
|
||||
1842,3610,2768,2192,4050,2028,2105,3731,5317,3013,4051,1218,5318,3380,3232,4052, # 2704
|
||||
4304,2584, 248,1634,3864, 912,5319,2845,3732,3060,3865, 654, 53,5320,3014,5321, # 2720
|
||||
1688,4642, 777,3494,1032,4053,1425,5322, 191, 820,2121,2846, 971,4643, 931,3233, # 2736
|
||||
135, 664, 783,3866,1998, 772,2922,1936,4054,3867,4644,2923,3234, 282,2732, 640, # 2752
|
||||
1372,3495,1127, 922, 325,3381,5323,5324, 711,2045,5325,5326,4055,2223,2800,1937, # 2768
|
||||
4056,3382,2224,2255,3868,2305,5327,4645,3869,1258,3312,4057,3235,2139,2965,4058, # 2784
|
||||
4059,5328,2225, 258,3236,4646, 101,1227,5329,3313,1755,5330,1391,3314,5331,2924, # 2800
|
||||
2057, 893,5332,5333,5334,1402,4305,2347,5335,5336,3237,3611,5337,5338, 878,1325, # 2816
|
||||
1781,2801,4647, 259,1385,2585, 744,1183,2272,4648,5339,4060,2509,5340, 684,1024, # 2832
|
||||
4306,5341, 472,3612,3496,1165,3315,4061,4062, 322,2153, 881, 455,1695,1152,1340, # 2848
|
||||
660, 554,2154,4649,1058,4650,4307, 830,1065,3383,4063,4651,1924,5342,1703,1919, # 2864
|
||||
5343, 932,2273, 122,5344,4652, 947, 677,5345,3870,2637, 297,1906,1925,2274,4653, # 2880
|
||||
2322,3316,5346,5347,4308,5348,4309, 84,4310, 112, 989,5349, 547,1059,4064, 701, # 2896
|
||||
3613,1019,5350,4311,5351,3497, 942, 639, 457,2306,2456, 993,2966, 407, 851, 494, # 2912
|
||||
4654,3384, 927,5352,1237,5353,2426,3385, 573,4312, 680, 921,2925,1279,1875, 285, # 2928
|
||||
790,1448,1984, 719,2168,5354,5355,4655,4065,4066,1649,5356,1541, 563,5357,1077, # 2944
|
||||
5358,3386,3061,3498, 511,3015,4067,4068,3733,4069,1268,2572,3387,3238,4656,4657, # 2960
|
||||
5359, 535,1048,1276,1189,2926,2029,3167,1438,1373,2847,2967,1134,2013,5360,4313, # 2976
|
||||
1238,2586,3109,1259,5361, 700,5362,2968,3168,3734,4314,5363,4315,1146,1876,1907, # 2992
|
||||
4658,2611,4070, 781,2427, 132,1589, 203, 147, 273,2802,2407, 898,1787,2155,4071, # 3008
|
||||
4072,5364,3871,2803,5365,5366,4659,4660,5367,3239,5368,1635,3872, 965,5369,1805, # 3024
|
||||
2699,1516,3614,1121,1082,1329,3317,4073,1449,3873, 65,1128,2848,2927,2769,1590, # 3040
|
||||
3874,5370,5371, 12,2668, 45, 976,2587,3169,4661, 517,2535,1013,1037,3240,5372, # 3056
|
||||
3875,2849,5373,3876,5374,3499,5375,2612, 614,1999,2323,3877,3110,2733,2638,5376, # 3072
|
||||
2588,4316, 599,1269,5377,1811,3735,5378,2700,3111, 759,1060, 489,1806,3388,3318, # 3088
|
||||
1358,5379,5380,2391,1387,1215,2639,2256, 490,5381,5382,4317,1759,2392,2348,5383, # 3104
|
||||
4662,3878,1908,4074,2640,1807,3241,4663,3500,3319,2770,2349, 874,5384,5385,3501, # 3120
|
||||
3736,1859, 91,2928,3737,3062,3879,4664,5386,3170,4075,2669,5387,3502,1202,1403, # 3136
|
||||
3880,2969,2536,1517,2510,4665,3503,2511,5388,4666,5389,2701,1886,1495,1731,4076, # 3152
|
||||
2370,4667,5390,2030,5391,5392,4077,2702,1216, 237,2589,4318,2324,4078,3881,4668, # 3168
|
||||
4669,2703,3615,3504, 445,4670,5393,5394,5395,5396,2771, 61,4079,3738,1823,4080, # 3184
|
||||
5397, 687,2046, 935, 925, 405,2670, 703,1096,1860,2734,4671,4081,1877,1367,2704, # 3200
|
||||
3389, 918,2106,1782,2483, 334,3320,1611,1093,4672, 564,3171,3505,3739,3390, 945, # 3216
|
||||
2641,2058,4673,5398,1926, 872,4319,5399,3506,2705,3112, 349,4320,3740,4082,4674, # 3232
|
||||
3882,4321,3741,2156,4083,4675,4676,4322,4677,2408,2047, 782,4084, 400, 251,4323, # 3248
|
||||
1624,5400,5401, 277,3742, 299,1265, 476,1191,3883,2122,4324,4325,1109, 205,5402, # 3264
|
||||
2590,1000,2157,3616,1861,5403,5404,5405,4678,5406,4679,2573, 107,2484,2158,4085, # 3280
|
||||
3507,3172,5407,1533, 541,1301, 158, 753,4326,2886,3617,5408,1696, 370,1088,4327, # 3296
|
||||
4680,3618, 579, 327, 440, 162,2244, 269,1938,1374,3508, 968,3063, 56,1396,3113, # 3312
|
||||
2107,3321,3391,5409,1927,2159,4681,3016,5410,3619,5411,5412,3743,4682,2485,5413, # 3328
|
||||
2804,5414,1650,4683,5415,2613,5416,5417,4086,2671,3392,1149,3393,4087,3884,4088, # 3344
|
||||
5418,1076, 49,5419, 951,3242,3322,3323, 450,2850, 920,5420,1812,2805,2371,4328, # 3360
|
||||
1909,1138,2372,3885,3509,5421,3243,4684,1910,1147,1518,2428,4685,3886,5422,4686, # 3376
|
||||
2393,2614, 260,1796,3244,5423,5424,3887,3324, 708,5425,3620,1704,5426,3621,1351, # 3392
|
||||
1618,3394,3017,1887, 944,4329,3395,4330,3064,3396,4331,5427,3744, 422, 413,1714, # 3408
|
||||
3325, 500,2059,2350,4332,2486,5428,1344,1911, 954,5429,1668,5430,5431,4089,2409, # 3424
|
||||
4333,3622,3888,4334,5432,2307,1318,2512,3114, 133,3115,2887,4687, 629, 31,2851, # 3440
|
||||
2706,3889,4688, 850, 949,4689,4090,2970,1732,2089,4335,1496,1853,5433,4091, 620, # 3456
|
||||
3245, 981,1242,3745,3397,1619,3746,1643,3326,2140,2457,1971,1719,3510,2169,5434, # 3472
|
||||
3246,5435,5436,3398,1829,5437,1277,4690,1565,2048,5438,1636,3623,3116,5439, 869, # 3488
|
||||
2852, 655,3890,3891,3117,4092,3018,3892,1310,3624,4691,5440,5441,5442,1733, 558, # 3504
|
||||
4692,3747, 335,1549,3065,1756,4336,3748,1946,3511,1830,1291,1192, 470,2735,2108, # 3520
|
||||
2806, 913,1054,4093,5443,1027,5444,3066,4094,4693, 982,2672,3399,3173,3512,3247, # 3536
|
||||
3248,1947,2807,5445, 571,4694,5446,1831,5447,3625,2591,1523,2429,5448,2090, 984, # 3552
|
||||
4695,3749,1960,5449,3750, 852, 923,2808,3513,3751, 969,1519, 999,2049,2325,1705, # 3568
|
||||
5450,3118, 615,1662, 151, 597,4095,2410,2326,1049, 275,4696,3752,4337, 568,3753, # 3584
|
||||
3626,2487,4338,3754,5451,2430,2275, 409,3249,5452,1566,2888,3514,1002, 769,2853, # 3600
|
||||
194,2091,3174,3755,2226,3327,4339, 628,1505,5453,5454,1763,2180,3019,4096, 521, # 3616
|
||||
1161,2592,1788,2206,2411,4697,4097,1625,4340,4341, 412, 42,3119, 464,5455,2642, # 3632
|
||||
4698,3400,1760,1571,2889,3515,2537,1219,2207,3893,2643,2141,2373,4699,4700,3328, # 3648
|
||||
1651,3401,3627,5456,5457,3628,2488,3516,5458,3756,5459,5460,2276,2092, 460,5461, # 3664
|
||||
4701,5462,3020, 962, 588,3629, 289,3250,2644,1116, 52,5463,3067,1797,5464,5465, # 3680
|
||||
5466,1467,5467,1598,1143,3757,4342,1985,1734,1067,4702,1280,3402, 465,4703,1572, # 3696
|
||||
510,5468,1928,2245,1813,1644,3630,5469,4704,3758,5470,5471,2673,1573,1534,5472, # 3712
|
||||
5473, 536,1808,1761,3517,3894,3175,2645,5474,5475,5476,4705,3518,2929,1912,2809, # 3728
|
||||
5477,3329,1122, 377,3251,5478, 360,5479,5480,4343,1529, 551,5481,2060,3759,1769, # 3744
|
||||
2431,5482,2930,4344,3330,3120,2327,2109,2031,4706,1404, 136,1468,1479, 672,1171, # 3760
|
||||
3252,2308, 271,3176,5483,2772,5484,2050, 678,2736, 865,1948,4707,5485,2014,4098, # 3776
|
||||
2971,5486,2737,2227,1397,3068,3760,4708,4709,1735,2931,3403,3631,5487,3895, 509, # 3792
|
||||
2854,2458,2890,3896,5488,5489,3177,3178,4710,4345,2538,4711,2309,1166,1010, 552, # 3808
|
||||
681,1888,5490,5491,2972,2973,4099,1287,1596,1862,3179, 358, 453, 736, 175, 478, # 3824
|
||||
1117, 905,1167,1097,5492,1854,1530,5493,1706,5494,2181,3519,2292,3761,3520,3632, # 3840
|
||||
4346,2093,4347,5495,3404,1193,2489,4348,1458,2193,2208,1863,1889,1421,3331,2932, # 3856
|
||||
3069,2182,3521, 595,2123,5496,4100,5497,5498,4349,1707,2646, 223,3762,1359, 751, # 3872
|
||||
3121, 183,3522,5499,2810,3021, 419,2374, 633, 704,3897,2394, 241,5500,5501,5502, # 3888
|
||||
838,3022,3763,2277,2773,2459,3898,1939,2051,4101,1309,3122,2246,1181,5503,1136, # 3904
|
||||
2209,3899,2375,1446,4350,2310,4712,5504,5505,4351,1055,2615, 484,3764,5506,4102, # 3920
|
||||
625,4352,2278,3405,1499,4353,4103,5507,4104,4354,3253,2279,2280,3523,5508,5509, # 3936
|
||||
2774, 808,2616,3765,3406,4105,4355,3123,2539, 526,3407,3900,4356, 955,5510,1620, # 3952
|
||||
4357,2647,2432,5511,1429,3766,1669,1832, 994, 928,5512,3633,1260,5513,5514,5515, # 3968
|
||||
1949,2293, 741,2933,1626,4358,2738,2460, 867,1184, 362,3408,1392,5516,5517,4106, # 3984
|
||||
4359,1770,1736,3254,2934,4713,4714,1929,2707,1459,1158,5518,3070,3409,2891,1292, # 4000
|
||||
1930,2513,2855,3767,1986,1187,2072,2015,2617,4360,5519,2574,2514,2170,3768,2490, # 4016
|
||||
3332,5520,3769,4715,5521,5522, 666,1003,3023,1022,3634,4361,5523,4716,1814,2257, # 4032
|
||||
574,3901,1603, 295,1535, 705,3902,4362, 283, 858, 417,5524,5525,3255,4717,4718, # 4048
|
||||
3071,1220,1890,1046,2281,2461,4107,1393,1599, 689,2575, 388,4363,5526,2491, 802, # 4064
|
||||
5527,2811,3903,2061,1405,2258,5528,4719,3904,2110,1052,1345,3256,1585,5529, 809, # 4080
|
||||
5530,5531,5532, 575,2739,3524, 956,1552,1469,1144,2328,5533,2329,1560,2462,3635, # 4096
|
||||
3257,4108, 616,2210,4364,3180,2183,2294,5534,1833,5535,3525,4720,5536,1319,3770, # 4112
|
||||
3771,1211,3636,1023,3258,1293,2812,5537,5538,5539,3905, 607,2311,3906, 762,2892, # 4128
|
||||
1439,4365,1360,4721,1485,3072,5540,4722,1038,4366,1450,2062,2648,4367,1379,4723, # 4144
|
||||
2593,5541,5542,4368,1352,1414,2330,2935,1172,5543,5544,3907,3908,4724,1798,1451, # 4160
|
||||
5545,5546,5547,5548,2936,4109,4110,2492,2351, 411,4111,4112,3637,3333,3124,4725, # 4176
|
||||
1561,2674,1452,4113,1375,5549,5550, 47,2974, 316,5551,1406,1591,2937,3181,5552, # 4192
|
||||
1025,2142,3125,3182, 354,2740, 884,2228,4369,2412, 508,3772, 726,3638, 996,2433, # 4208
|
||||
3639, 729,5553, 392,2194,1453,4114,4726,3773,5554,5555,2463,3640,2618,1675,2813, # 4224
|
||||
919,2352,2975,2353,1270,4727,4115, 73,5556,5557, 647,5558,3259,2856,2259,1550, # 4240
|
||||
1346,3024,5559,1332, 883,3526,5560,5561,5562,5563,3334,2775,5564,1212, 831,1347, # 4256
|
||||
4370,4728,2331,3909,1864,3073, 720,3910,4729,4730,3911,5565,4371,5566,5567,4731, # 4272
|
||||
5568,5569,1799,4732,3774,2619,4733,3641,1645,2376,4734,5570,2938, 669,2211,2675, # 4288
|
||||
2434,5571,2893,5572,5573,1028,3260,5574,4372,2413,5575,2260,1353,5576,5577,4735, # 4304
|
||||
3183, 518,5578,4116,5579,4373,1961,5580,2143,4374,5581,5582,3025,2354,2355,3912, # 4320
|
||||
516,1834,1454,4117,2708,4375,4736,2229,2620,1972,1129,3642,5583,2776,5584,2976, # 4336
|
||||
1422, 577,1470,3026,1524,3410,5585,5586, 432,4376,3074,3527,5587,2594,1455,2515, # 4352
|
||||
2230,1973,1175,5588,1020,2741,4118,3528,4737,5589,2742,5590,1743,1361,3075,3529, # 4368
|
||||
2649,4119,4377,4738,2295, 895, 924,4378,2171, 331,2247,3076, 166,1627,3077,1098, # 4384
|
||||
5591,1232,2894,2231,3411,4739, 657, 403,1196,2377, 542,3775,3412,1600,4379,3530, # 4400
|
||||
5592,4740,2777,3261, 576, 530,1362,4741,4742,2540,2676,3776,4120,5593, 842,3913, # 4416
|
||||
5594,2814,2032,1014,4121, 213,2709,3413, 665, 621,4380,5595,3777,2939,2435,5596, # 4432
|
||||
2436,3335,3643,3414,4743,4381,2541,4382,4744,3644,1682,4383,3531,1380,5597, 724, # 4448
|
||||
2282, 600,1670,5598,1337,1233,4745,3126,2248,5599,1621,4746,5600, 651,4384,5601, # 4464
|
||||
1612,4385,2621,5602,2857,5603,2743,2312,3078,5604, 716,2464,3079, 174,1255,2710, # 4480
|
||||
4122,3645, 548,1320,1398, 728,4123,1574,5605,1891,1197,3080,4124,5606,3081,3082, # 4496
|
||||
3778,3646,3779, 747,5607, 635,4386,4747,5608,5609,5610,4387,5611,5612,4748,5613, # 4512
|
||||
3415,4749,2437, 451,5614,3780,2542,2073,4388,2744,4389,4125,5615,1764,4750,5616, # 4528
|
||||
4390, 350,4751,2283,2395,2493,5617,4391,4126,2249,1434,4127, 488,4752, 458,4392, # 4544
|
||||
4128,3781, 771,1330,2396,3914,2576,3184,2160,2414,1553,2677,3185,4393,5618,2494, # 4560
|
||||
2895,2622,1720,2711,4394,3416,4753,5619,2543,4395,5620,3262,4396,2778,5621,2016, # 4576
|
||||
2745,5622,1155,1017,3782,3915,5623,3336,2313, 201,1865,4397,1430,5624,4129,5625, # 4592
|
||||
5626,5627,5628,5629,4398,1604,5630, 414,1866, 371,2595,4754,4755,3532,2017,3127, # 4608
|
||||
4756,1708, 960,4399, 887, 389,2172,1536,1663,1721,5631,2232,4130,2356,2940,1580, # 4624
|
||||
5632,5633,1744,4757,2544,4758,4759,5634,4760,5635,2074,5636,4761,3647,3417,2896, # 4640
|
||||
4400,5637,4401,2650,3418,2815, 673,2712,2465, 709,3533,4131,3648,4402,5638,1148, # 4656
|
||||
502, 634,5639,5640,1204,4762,3649,1575,4763,2623,3783,5641,3784,3128, 948,3263, # 4672
|
||||
121,1745,3916,1110,5642,4403,3083,2516,3027,4132,3785,1151,1771,3917,1488,4133, # 4688
|
||||
1987,5643,2438,3534,5644,5645,2094,5646,4404,3918,1213,1407,2816, 531,2746,2545, # 4704
|
||||
3264,1011,1537,4764,2779,4405,3129,1061,5647,3786,3787,1867,2897,5648,2018, 120, # 4720
|
||||
4406,4407,2063,3650,3265,2314,3919,2678,3419,1955,4765,4134,5649,3535,1047,2713, # 4736
|
||||
1266,5650,1368,4766,2858, 649,3420,3920,2546,2747,1102,2859,2679,5651,5652,2000, # 4752
|
||||
5653,1111,3651,2977,5654,2495,3921,3652,2817,1855,3421,3788,5655,5656,3422,2415, # 4768
|
||||
2898,3337,3266,3653,5657,2577,5658,3654,2818,4135,1460, 856,5659,3655,5660,2899, # 4784
|
||||
2978,5661,2900,3922,5662,4408, 632,2517, 875,3923,1697,3924,2296,5663,5664,4767, # 4800
|
||||
3028,1239, 580,4768,4409,5665, 914, 936,2075,1190,4136,1039,2124,5666,5667,5668, # 4816
|
||||
5669,3423,1473,5670,1354,4410,3925,4769,2173,3084,4137, 915,3338,4411,4412,3339, # 4832
|
||||
1605,1835,5671,2748, 398,3656,4413,3926,4138, 328,1913,2860,4139,3927,1331,4414, # 4848
|
||||
3029, 937,4415,5672,3657,4140,4141,3424,2161,4770,3425, 524, 742, 538,3085,1012, # 4864
|
||||
5673,5674,3928,2466,5675, 658,1103, 225,3929,5676,5677,4771,5678,4772,5679,3267, # 4880
|
||||
1243,5680,4142, 963,2250,4773,5681,2714,3658,3186,5682,5683,2596,2332,5684,4774, # 4896
|
||||
5685,5686,5687,3536, 957,3426,2547,2033,1931,2941,2467, 870,2019,3659,1746,2780, # 4912
|
||||
2781,2439,2468,5688,3930,5689,3789,3130,3790,3537,3427,3791,5690,1179,3086,5691, # 4928
|
||||
3187,2378,4416,3792,2548,3188,3131,2749,4143,5692,3428,1556,2549,2297, 977,2901, # 4944
|
||||
2034,4144,1205,3429,5693,1765,3430,3189,2125,1271, 714,1689,4775,3538,5694,2333, # 4960
|
||||
3931, 533,4417,3660,2184, 617,5695,2469,3340,3539,2315,5696,5697,3190,5698,5699, # 4976
|
||||
3932,1988, 618, 427,2651,3540,3431,5700,5701,1244,1690,5702,2819,4418,4776,5703, # 4992
|
||||
3541,4777,5704,2284,1576, 473,3661,4419,3432, 972,5705,3662,5706,3087,5707,5708, # 5008
|
||||
4778,4779,5709,3793,4145,4146,5710, 153,4780, 356,5711,1892,2902,4420,2144, 408, # 5024
|
||||
803,2357,5712,3933,5713,4421,1646,2578,2518,4781,4782,3934,5714,3935,4422,5715, # 5040
|
||||
2416,3433, 752,5716,5717,1962,3341,2979,5718, 746,3030,2470,4783,4423,3794, 698, # 5056
|
||||
4784,1893,4424,3663,2550,4785,3664,3936,5719,3191,3434,5720,1824,1302,4147,2715, # 5072
|
||||
3937,1974,4425,5721,4426,3192, 823,1303,1288,1236,2861,3542,4148,3435, 774,3938, # 5088
|
||||
5722,1581,4786,1304,2862,3939,4787,5723,2440,2162,1083,3268,4427,4149,4428, 344, # 5104
|
||||
1173, 288,2316, 454,1683,5724,5725,1461,4788,4150,2597,5726,5727,4789, 985, 894, # 5120
|
||||
5728,3436,3193,5729,1914,2942,3795,1989,5730,2111,1975,5731,4151,5732,2579,1194, # 5136
|
||||
425,5733,4790,3194,1245,3796,4429,5734,5735,2863,5736, 636,4791,1856,3940, 760, # 5152
|
||||
1800,5737,4430,2212,1508,4792,4152,1894,1684,2298,5738,5739,4793,4431,4432,2213, # 5168
|
||||
479,5740,5741, 832,5742,4153,2496,5743,2980,2497,3797, 990,3132, 627,1815,2652, # 5184
|
||||
4433,1582,4434,2126,2112,3543,4794,5744, 799,4435,3195,5745,4795,2113,1737,3031, # 5200
|
||||
1018, 543, 754,4436,3342,1676,4796,4797,4154,4798,1489,5746,3544,5747,2624,2903, # 5216
|
||||
4155,5748,5749,2981,5750,5751,5752,5753,3196,4799,4800,2185,1722,5754,3269,3270, # 5232
|
||||
1843,3665,1715, 481, 365,1976,1857,5755,5756,1963,2498,4801,5757,2127,3666,3271, # 5248
|
||||
433,1895,2064,2076,5758, 602,2750,5759,5760,5761,5762,5763,3032,1628,3437,5764, # 5264
|
||||
3197,4802,4156,2904,4803,2519,5765,2551,2782,5766,5767,5768,3343,4804,2905,5769, # 5280
|
||||
4805,5770,2864,4806,4807,1221,2982,4157,2520,5771,5772,5773,1868,1990,5774,5775, # 5296
|
||||
5776,1896,5777,5778,4808,1897,4158, 318,5779,2095,4159,4437,5780,5781, 485,5782, # 5312
|
||||
938,3941, 553,2680, 116,5783,3942,3667,5784,3545,2681,2783,3438,3344,2820,5785, # 5328
|
||||
3668,2943,4160,1747,2944,2983,5786,5787, 207,5788,4809,5789,4810,2521,5790,3033, # 5344
|
||||
890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360
|
||||
2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376
|
||||
)
|
||||
|
||||
47
venv/Lib/site-packages/pip/_vendor/chardet/big5prober.py
Normal file
47
venv/Lib/site-packages/pip/_vendor/chardet/big5prober.py
Normal file
@@ -0,0 +1,47 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import Big5DistributionAnalysis
|
||||
from .mbcssm import BIG5_SM_MODEL
|
||||
|
||||
|
||||
class Big5Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(Big5Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
|
||||
self.distribution_analyzer = Big5DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "Big5"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
233
venv/Lib/site-packages/pip/_vendor/chardet/chardistribution.py
Normal file
233
venv/Lib/site-packages/pip/_vendor/chardet/chardistribution.py
Normal file
@@ -0,0 +1,233 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
|
||||
BIG5_TYPICAL_DISTRIBUTION_RATIO)
|
||||
from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO)
|
||||
|
||||
|
||||
class CharDistributionAnalysis(object):
|
||||
ENOUGH_DATA_THRESHOLD = 1024
|
||||
SURE_YES = 0.99
|
||||
SURE_NO = 0.01
|
||||
MINIMUM_DATA_THRESHOLD = 3
|
||||
|
||||
def __init__(self):
|
||||
# Mapping table to get frequency order from char order (get from
|
||||
# GetOrder())
|
||||
self._char_to_freq_order = None
|
||||
self._table_size = None # Size of above table
|
||||
# This is a constant value which varies from language to language,
|
||||
# used in calculating confidence. See
|
||||
# http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
|
||||
# for further detail.
|
||||
self.typical_distribution_ratio = None
|
||||
self._done = None
|
||||
self._total_chars = None
|
||||
self._freq_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""reset analyser, clear any state"""
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
self._total_chars = 0 # Total characters encountered
|
||||
# The number of characters whose frequency order is less than 512
|
||||
self._freq_chars = 0
|
||||
|
||||
def feed(self, char, char_len):
|
||||
"""feed a character with known length"""
|
||||
if char_len == 2:
|
||||
# we only care about 2-bytes character in our distribution analysis
|
||||
order = self.get_order(char)
|
||||
else:
|
||||
order = -1
|
||||
if order >= 0:
|
||||
self._total_chars += 1
|
||||
# order is valid
|
||||
if order < self._table_size:
|
||||
if 512 > self._char_to_freq_order[order]:
|
||||
self._freq_chars += 1
|
||||
|
||||
def get_confidence(self):
|
||||
"""return confidence based on existing data"""
|
||||
# if we didn't receive any character in our consideration range,
|
||||
# return negative answer
|
||||
if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
|
||||
return self.SURE_NO
|
||||
|
||||
if self._total_chars != self._freq_chars:
|
||||
r = (self._freq_chars / ((self._total_chars - self._freq_chars)
|
||||
* self.typical_distribution_ratio))
|
||||
if r < self.SURE_YES:
|
||||
return r
|
||||
|
||||
# normalize confidence (we don't want to be 100% sure)
|
||||
return self.SURE_YES
|
||||
|
||||
def got_enough_data(self):
|
||||
# It is not necessary to receive all data to draw conclusion.
|
||||
# For charset detection, certain amount of data is enough
|
||||
return self._total_chars > self.ENOUGH_DATA_THRESHOLD
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# We do not handle characters based on the original encoding string,
|
||||
# but convert this encoding string to a number, here called order.
|
||||
# This allows multiple encodings of a language to share one frequency
|
||||
# table.
|
||||
return -1
|
||||
|
||||
|
||||
class EUCTWDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCTWDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCTW_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-TW encoding, we are interested
|
||||
# first byte range: 0xc4 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xC4:
|
||||
return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class EUCKRDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCKRDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = EUCKR_TABLE_SIZE
|
||||
self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-KR encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char = byte_str[0]
|
||||
if first_char >= 0xB0:
|
||||
return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class GB2312DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(GB2312DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = GB2312_TABLE_SIZE
|
||||
self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for GB2312 encoding, we are interested
|
||||
# first byte range: 0xb0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0xB0) and (second_char >= 0xA1):
|
||||
return 94 * (first_char - 0xB0) + second_char - 0xA1
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class Big5DistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(Big5DistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = BIG5_TABLE_SIZE
|
||||
self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for big5 encoding, we are interested
|
||||
# first byte range: 0xa4 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if first_char >= 0xA4:
|
||||
if second_char >= 0xA1:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
|
||||
else:
|
||||
return 157 * (first_char - 0xA4) + second_char - 0x40
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
class SJISDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for sjis encoding, we are interested
|
||||
# first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
|
||||
# second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
|
||||
# no validation needed here. State machine has done that
|
||||
first_char, second_char = byte_str[0], byte_str[1]
|
||||
if (first_char >= 0x81) and (first_char <= 0x9F):
|
||||
order = 188 * (first_char - 0x81)
|
||||
elif (first_char >= 0xE0) and (first_char <= 0xEF):
|
||||
order = 188 * (first_char - 0xE0 + 31)
|
||||
else:
|
||||
return -1
|
||||
order = order + second_char - 0x40
|
||||
if second_char > 0x7F:
|
||||
order = -1
|
||||
return order
|
||||
|
||||
|
||||
class EUCJPDistributionAnalysis(CharDistributionAnalysis):
|
||||
def __init__(self):
|
||||
super(EUCJPDistributionAnalysis, self).__init__()
|
||||
self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
|
||||
self._table_size = JIS_TABLE_SIZE
|
||||
self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
|
||||
|
||||
def get_order(self, byte_str):
|
||||
# for euc-JP encoding, we are interested
|
||||
# first byte range: 0xa0 -- 0xfe
|
||||
# second byte range: 0xa1 -- 0xfe
|
||||
# no validation needed here. State machine has done that
|
||||
char = byte_str[0]
|
||||
if char >= 0xA0:
|
||||
return 94 * (char - 0xA1) + byte_str[1] - 0xa1
|
||||
else:
|
||||
return -1
|
||||
107
venv/Lib/site-packages/pip/_vendor/chardet/charsetgroupprober.py
Normal file
107
venv/Lib/site-packages/pip/_vendor/chardet/charsetgroupprober.py
Normal file
@@ -0,0 +1,107 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState
|
||||
from .charsetprober import CharSetProber
|
||||
|
||||
|
||||
class CharSetGroupProber(CharSetProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self._active_num = 0
|
||||
self.probers = []
|
||||
self._best_guess_prober = None
|
||||
|
||||
def reset(self):
|
||||
super(CharSetGroupProber, self).reset()
|
||||
self._active_num = 0
|
||||
for prober in self.probers:
|
||||
if prober:
|
||||
prober.reset()
|
||||
prober.active = True
|
||||
self._active_num += 1
|
||||
self._best_guess_prober = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if not self._best_guess_prober:
|
||||
self.get_confidence()
|
||||
if not self._best_guess_prober:
|
||||
return None
|
||||
return self._best_guess_prober.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
continue
|
||||
state = prober.feed(byte_str)
|
||||
if not state:
|
||||
continue
|
||||
if state == ProbingState.FOUND_IT:
|
||||
self._best_guess_prober = prober
|
||||
self._state = ProbingState.FOUND_IT
|
||||
return self.state
|
||||
elif state == ProbingState.NOT_ME:
|
||||
prober.active = False
|
||||
self._active_num -= 1
|
||||
if self._active_num <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
state = self.state
|
||||
if state == ProbingState.FOUND_IT:
|
||||
return 0.99
|
||||
elif state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
best_conf = 0.0
|
||||
self._best_guess_prober = None
|
||||
for prober in self.probers:
|
||||
if not prober:
|
||||
continue
|
||||
if not prober.active:
|
||||
self.logger.debug('%s not active', prober.charset_name)
|
||||
continue
|
||||
conf = prober.get_confidence()
|
||||
self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
|
||||
if best_conf < conf:
|
||||
best_conf = conf
|
||||
self._best_guess_prober = prober
|
||||
if not self._best_guess_prober:
|
||||
return 0.0
|
||||
return best_conf
|
||||
145
venv/Lib/site-packages/pip/_vendor/chardet/charsetprober.py
Normal file
145
venv/Lib/site-packages/pip/_vendor/chardet/charsetprober.py
Normal file
@@ -0,0 +1,145 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .enums import ProbingState
|
||||
|
||||
|
||||
class CharSetProber(object):
|
||||
|
||||
SHORTCUT_THRESHOLD = 0.95
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
self._state = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def reset(self):
|
||||
self._state = ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return None
|
||||
|
||||
def feed(self, buf):
|
||||
pass
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
return self._state
|
||||
|
||||
def get_confidence(self):
|
||||
return 0.0
|
||||
|
||||
@staticmethod
|
||||
def filter_high_byte_only(buf):
|
||||
buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
|
||||
return buf
|
||||
|
||||
@staticmethod
|
||||
def filter_international_words(buf):
|
||||
"""
|
||||
We define three types of bytes:
|
||||
alphabet: english alphabets [a-zA-Z]
|
||||
international: international characters [\x80-\xFF]
|
||||
marker: everything else [^a-zA-Z\x80-\xFF]
|
||||
|
||||
The input buffer can be thought to contain a series of words delimited
|
||||
by markers. This function works to filter all words that contain at
|
||||
least one international character. All contiguous sequences of markers
|
||||
are replaced by a single space ascii character.
|
||||
|
||||
This filter applies to all scripts which do not use English characters.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
|
||||
# This regex expression filters out only words that have at-least one
|
||||
# international character. The word may include one marker character at
|
||||
# the end.
|
||||
words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
|
||||
buf)
|
||||
|
||||
for word in words:
|
||||
filtered.extend(word[:-1])
|
||||
|
||||
# If the last character in the word is a marker, replace it with a
|
||||
# space as markers shouldn't affect our analysis (they are used
|
||||
# similarly across all languages and may thus have similar
|
||||
# frequencies).
|
||||
last_char = word[-1:]
|
||||
if not last_char.isalpha() and last_char < b'\x80':
|
||||
last_char = b' '
|
||||
filtered.extend(last_char)
|
||||
|
||||
return filtered
|
||||
|
||||
@staticmethod
|
||||
def filter_with_english_letters(buf):
|
||||
"""
|
||||
Returns a copy of ``buf`` that retains only the sequences of English
|
||||
alphabet and high byte characters that are not between <> characters.
|
||||
Also retains English alphabet and high byte characters immediately
|
||||
before occurrences of >.
|
||||
|
||||
This filter can be applied to all scripts which contain both English
|
||||
characters and extended ASCII characters, but is currently only used by
|
||||
``Latin1Prober``.
|
||||
"""
|
||||
filtered = bytearray()
|
||||
in_tag = False
|
||||
prev = 0
|
||||
|
||||
for curr in range(len(buf)):
|
||||
# Slice here to get bytes instead of an int with Python 3
|
||||
buf_char = buf[curr:curr + 1]
|
||||
# Check if we're coming out of or entering an HTML tag
|
||||
if buf_char == b'>':
|
||||
in_tag = False
|
||||
elif buf_char == b'<':
|
||||
in_tag = True
|
||||
|
||||
# If current character is not extended-ASCII and not alphabetic...
|
||||
if buf_char < b'\x80' and not buf_char.isalpha():
|
||||
# ...and we're not in a tag
|
||||
if curr > prev and not in_tag:
|
||||
# Keep everything after last non-extended-ASCII,
|
||||
# non-alphabetic character
|
||||
filtered.extend(buf[prev:curr])
|
||||
# Output a space to delimit stretch we kept
|
||||
filtered.extend(b' ')
|
||||
prev = curr + 1
|
||||
|
||||
# If we're not in a tag...
|
||||
if not in_tag:
|
||||
# Keep everything after last non-extended-ASCII, non-alphabetic
|
||||
# character
|
||||
filtered.extend(buf[prev:])
|
||||
|
||||
return filtered
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
84
venv/Lib/site-packages/pip/_vendor/chardet/cli/chardetect.py
Normal file
84
venv/Lib/site-packages/pip/_vendor/chardet/cli/chardetect.py
Normal file
@@ -0,0 +1,84 @@
|
||||
"""
|
||||
Script which takes one or more file paths and reports on their detected
|
||||
encodings
|
||||
|
||||
Example::
|
||||
|
||||
% chardetect somefile someotherfile
|
||||
somefile: windows-1252 with confidence 0.5
|
||||
someotherfile: ascii with confidence 1.0
|
||||
|
||||
If no paths are provided, it takes its input from stdin.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, print_function, unicode_literals
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from pip._vendor.chardet import __version__
|
||||
from pip._vendor.chardet.compat import PY2
|
||||
from pip._vendor.chardet.universaldetector import UniversalDetector
|
||||
|
||||
|
||||
def description_of(lines, name='stdin'):
|
||||
"""
|
||||
Return a string describing the probable encoding of a file or
|
||||
list of strings.
|
||||
|
||||
:param lines: The lines to get the encoding of.
|
||||
:type lines: Iterable of bytes
|
||||
:param name: Name of file or collection of lines
|
||||
:type name: str
|
||||
"""
|
||||
u = UniversalDetector()
|
||||
for line in lines:
|
||||
line = bytearray(line)
|
||||
u.feed(line)
|
||||
# shortcut out of the loop to save reading further - particularly useful if we read a BOM.
|
||||
if u.done:
|
||||
break
|
||||
u.close()
|
||||
result = u.result
|
||||
if PY2:
|
||||
name = name.decode(sys.getfilesystemencoding(), 'ignore')
|
||||
if result['encoding']:
|
||||
return '{}: {} with confidence {}'.format(name, result['encoding'],
|
||||
result['confidence'])
|
||||
else:
|
||||
return '{}: no result'.format(name)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
"""
|
||||
Handles command line arguments and gets things started.
|
||||
|
||||
:param argv: List of arguments, as if specified on the command-line.
|
||||
If None, ``sys.argv[1:]`` is used instead.
|
||||
:type argv: list of str
|
||||
"""
|
||||
# Get command line arguments
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Takes one or more file paths and reports their detected \
|
||||
encodings")
|
||||
parser.add_argument('input',
|
||||
help='File whose encoding we would like to determine. \
|
||||
(default: stdin)',
|
||||
type=argparse.FileType('rb'), nargs='*',
|
||||
default=[sys.stdin if PY2 else sys.stdin.buffer])
|
||||
parser.add_argument('--version', action='version',
|
||||
version='%(prog)s {}'.format(__version__))
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
for f in args.input:
|
||||
if f.isatty():
|
||||
print("You are running chardetect interactively. Press " +
|
||||
"CTRL-D twice at the start of a blank line to signal the " +
|
||||
"end of your input. If you want help, run chardetect " +
|
||||
"--help\n", file=sys.stderr)
|
||||
print(description_of(f, f.name))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,88 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import logging
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
|
||||
class CodingStateMachine(object):
|
||||
"""
|
||||
A state machine to verify a byte sequence for a particular encoding. For
|
||||
each byte the detector receives, it will feed that byte to every active
|
||||
state machine available, one byte at a time. The state machine changes its
|
||||
state based on its previous state and the byte it receives. There are 3
|
||||
states in a state machine that are of interest to an auto-detector:
|
||||
|
||||
START state: This is the state to start with, or a legal byte sequence
|
||||
(i.e. a valid code point) for character has been identified.
|
||||
|
||||
ME state: This indicates that the state machine identified a byte sequence
|
||||
that is specific to the charset it is designed for and that
|
||||
there is no other possible encoding which can contain this byte
|
||||
sequence. This will to lead to an immediate positive answer for
|
||||
the detector.
|
||||
|
||||
ERROR state: This indicates the state machine identified an illegal byte
|
||||
sequence for that encoding. This will lead to an immediate
|
||||
negative answer for this encoding. Detector will exclude this
|
||||
encoding from consideration from here on.
|
||||
"""
|
||||
def __init__(self, sm):
|
||||
self._model = sm
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = 0
|
||||
self._curr_state = None
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._curr_state = MachineState.START
|
||||
|
||||
def next_state(self, c):
|
||||
# for each byte we get its class
|
||||
# if it is first byte, we also get byte length
|
||||
byte_class = self._model['class_table'][c]
|
||||
if self._curr_state == MachineState.START:
|
||||
self._curr_byte_pos = 0
|
||||
self._curr_char_len = self._model['char_len_table'][byte_class]
|
||||
# from byte's class and state_table, we get its next state
|
||||
curr_state = (self._curr_state * self._model['class_factor']
|
||||
+ byte_class)
|
||||
self._curr_state = self._model['state_table'][curr_state]
|
||||
self._curr_byte_pos += 1
|
||||
return self._curr_state
|
||||
|
||||
def get_current_charlen(self):
|
||||
return self._curr_char_len
|
||||
|
||||
def get_coding_state_machine(self):
|
||||
return self._model['name']
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._model['language']
|
||||
36
venv/Lib/site-packages/pip/_vendor/chardet/compat.py
Normal file
36
venv/Lib/site-packages/pip/_vendor/chardet/compat.py
Normal file
@@ -0,0 +1,36 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# Contributor(s):
|
||||
# Dan Blanchard
|
||||
# Ian Cordasco
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
if sys.version_info < (3, 0):
|
||||
PY2 = True
|
||||
PY3 = False
|
||||
string_types = (str, unicode)
|
||||
text_type = unicode
|
||||
iteritems = dict.iteritems
|
||||
else:
|
||||
PY2 = False
|
||||
PY3 = True
|
||||
string_types = (bytes, str)
|
||||
text_type = str
|
||||
iteritems = dict.items
|
||||
49
venv/Lib/site-packages/pip/_vendor/chardet/cp949prober.py
Normal file
49
venv/Lib/site-packages/pip/_vendor/chardet/cp949prober.py
Normal file
@@ -0,0 +1,49 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .mbcssm import CP949_SM_MODEL
|
||||
|
||||
|
||||
class CP949Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(CP949Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
|
||||
# NOTE: CP949 is a superset of EUC-KR, so the distribution should be
|
||||
# not different.
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "CP949"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
76
venv/Lib/site-packages/pip/_vendor/chardet/enums.py
Normal file
76
venv/Lib/site-packages/pip/_vendor/chardet/enums.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
All of the Enums that are used throughout the chardet package.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
|
||||
class InputState(object):
|
||||
"""
|
||||
This enum represents the different states a universal detector can be in.
|
||||
"""
|
||||
PURE_ASCII = 0
|
||||
ESC_ASCII = 1
|
||||
HIGH_BYTE = 2
|
||||
|
||||
|
||||
class LanguageFilter(object):
|
||||
"""
|
||||
This enum represents the different language filters we can apply to a
|
||||
``UniversalDetector``.
|
||||
"""
|
||||
CHINESE_SIMPLIFIED = 0x01
|
||||
CHINESE_TRADITIONAL = 0x02
|
||||
JAPANESE = 0x04
|
||||
KOREAN = 0x08
|
||||
NON_CJK = 0x10
|
||||
ALL = 0x1F
|
||||
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
|
||||
CJK = CHINESE | JAPANESE | KOREAN
|
||||
|
||||
|
||||
class ProbingState(object):
|
||||
"""
|
||||
This enum represents the different states a prober can be in.
|
||||
"""
|
||||
DETECTING = 0
|
||||
FOUND_IT = 1
|
||||
NOT_ME = 2
|
||||
|
||||
|
||||
class MachineState(object):
|
||||
"""
|
||||
This enum represents the different states a state machine can be in.
|
||||
"""
|
||||
START = 0
|
||||
ERROR = 1
|
||||
ITS_ME = 2
|
||||
|
||||
|
||||
class SequenceLikelihood(object):
|
||||
"""
|
||||
This enum represents the likelihood of a character following the previous one.
|
||||
"""
|
||||
NEGATIVE = 0
|
||||
UNLIKELY = 1
|
||||
LIKELY = 2
|
||||
POSITIVE = 3
|
||||
|
||||
@classmethod
|
||||
def get_num_categories(cls):
|
||||
""":returns: The number of likelihood categories in the enum."""
|
||||
return 4
|
||||
|
||||
|
||||
class CharacterCategory(object):
|
||||
"""
|
||||
This enum represents the different categories language models for
|
||||
``SingleByteCharsetProber`` put characters into.
|
||||
|
||||
Anything less than CONTROL is considered a letter.
|
||||
"""
|
||||
UNDEFINED = 255
|
||||
LINE_BREAK = 254
|
||||
SYMBOL = 253
|
||||
DIGIT = 252
|
||||
CONTROL = 251
|
||||
101
venv/Lib/site-packages/pip/_vendor/chardet/escprober.py
Normal file
101
venv/Lib/site-packages/pip/_vendor/chardet/escprober.py
Normal file
@@ -0,0 +1,101 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .enums import LanguageFilter, ProbingState, MachineState
|
||||
from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
|
||||
ISO2022KR_SM_MODEL)
|
||||
|
||||
|
||||
class EscCharSetProber(CharSetProber):
|
||||
"""
|
||||
This CharSetProber uses a "code scheme" approach for detecting encodings,
|
||||
whereby easily recognizable escape or shift sequences are relied on to
|
||||
identify these encodings.
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.coding_sm = []
|
||||
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
|
||||
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.JAPANESE:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
|
||||
if self.lang_filter & LanguageFilter.KOREAN:
|
||||
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
|
||||
self.active_sm_count = None
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
self._state = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EscCharSetProber, self).reset()
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm:
|
||||
continue
|
||||
coding_sm.active = True
|
||||
coding_sm.reset()
|
||||
self.active_sm_count = len(self.coding_sm)
|
||||
self._detected_charset = None
|
||||
self._detected_language = None
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._detected_charset
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return self._detected_language
|
||||
|
||||
def get_confidence(self):
|
||||
if self._detected_charset:
|
||||
return 0.99
|
||||
else:
|
||||
return 0.00
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
for coding_sm in self.coding_sm:
|
||||
if not coding_sm or not coding_sm.active:
|
||||
continue
|
||||
coding_state = coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
coding_sm.active = False
|
||||
self.active_sm_count -= 1
|
||||
if self.active_sm_count <= 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
return self.state
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
self._detected_charset = coding_sm.get_coding_state_machine()
|
||||
self._detected_language = coding_sm.language
|
||||
return self.state
|
||||
|
||||
return self.state
|
||||
246
venv/Lib/site-packages/pip/_vendor/chardet/escsm.py
Normal file
246
venv/Lib/site-packages/pip/_vendor/chardet/escsm.py
Normal file
@@ -0,0 +1,246 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
HZ_CLS = (
|
||||
1,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,4,0,5,2,0, # 78 - 7f
|
||||
1,1,1,1,1,1,1,1, # 80 - 87
|
||||
1,1,1,1,1,1,1,1, # 88 - 8f
|
||||
1,1,1,1,1,1,1,1, # 90 - 97
|
||||
1,1,1,1,1,1,1,1, # 98 - 9f
|
||||
1,1,1,1,1,1,1,1, # a0 - a7
|
||||
1,1,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,1,1,1,1,1,1, # c0 - c7
|
||||
1,1,1,1,1,1,1,1, # c8 - cf
|
||||
1,1,1,1,1,1,1,1, # d0 - d7
|
||||
1,1,1,1,1,1,1,1, # d8 - df
|
||||
1,1,1,1,1,1,1,1, # e0 - e7
|
||||
1,1,1,1,1,1,1,1, # e8 - ef
|
||||
1,1,1,1,1,1,1,1, # f0 - f7
|
||||
1,1,1,1,1,1,1,1, # f8 - ff
|
||||
)
|
||||
|
||||
HZ_ST = (
|
||||
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
|
||||
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
|
||||
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
|
||||
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
|
||||
)
|
||||
|
||||
HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
HZ_SM_MODEL = {'class_table': HZ_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': HZ_ST,
|
||||
'char_len_table': HZ_CHAR_LEN_TABLE,
|
||||
'name': "HZ-GB-2312",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022CN_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,4,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022CN_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
|
||||
)
|
||||
|
||||
ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS,
|
||||
'class_factor': 9,
|
||||
'state_table': ISO2022CN_ST,
|
||||
'char_len_table': ISO2022CN_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-CN",
|
||||
'language': 'Chinese'}
|
||||
|
||||
ISO2022JP_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,2,2, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,7,0,0,0, # 20 - 27
|
||||
3,0,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
6,0,4,0,8,0,0,0, # 40 - 47
|
||||
0,9,5,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022JP_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
|
||||
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
|
||||
)
|
||||
|
||||
ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': ISO2022JP_ST,
|
||||
'char_len_table': ISO2022JP_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-JP",
|
||||
'language': 'Japanese'}
|
||||
|
||||
ISO2022KR_CLS = (
|
||||
2,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,0,0,0,0,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,1,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,3,0,0,0, # 20 - 27
|
||||
0,4,0,0,0,0,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,5,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
2,2,2,2,2,2,2,2, # 80 - 87
|
||||
2,2,2,2,2,2,2,2, # 88 - 8f
|
||||
2,2,2,2,2,2,2,2, # 90 - 97
|
||||
2,2,2,2,2,2,2,2, # 98 - 9f
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,2, # f8 - ff
|
||||
)
|
||||
|
||||
ISO2022KR_ST = (
|
||||
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
|
||||
)
|
||||
|
||||
ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
|
||||
|
||||
ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': ISO2022KR_ST,
|
||||
'char_len_table': ISO2022KR_CHAR_LEN_TABLE,
|
||||
'name': "ISO-2022-KR",
|
||||
'language': 'Korean'}
|
||||
|
||||
|
||||
92
venv/Lib/site-packages/pip/_vendor/chardet/eucjpprober.py
Normal file
92
venv/Lib/site-packages/pip/_vendor/chardet/eucjpprober.py
Normal file
@@ -0,0 +1,92 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import ProbingState, MachineState
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCJPDistributionAnalysis
|
||||
from .jpcntx import EUCJPContextAnalysis
|
||||
from .mbcssm import EUCJP_SM_MODEL
|
||||
|
||||
|
||||
class EUCJPProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCJPProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
|
||||
self.distribution_analyzer = EUCJPDistributionAnalysis()
|
||||
self.context_analyzer = EUCJPContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(EUCJPProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-JP"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char, char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
195
venv/Lib/site-packages/pip/_vendor/chardet/euckrfreq.py
Normal file
195
venv/Lib/site-packages/pip/_vendor/chardet/euckrfreq.py
Normal file
@@ -0,0 +1,195 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
|
||||
# 128 --> 0.79
|
||||
# 256 --> 0.92
|
||||
# 512 --> 0.986
|
||||
# 1024 --> 0.99944
|
||||
# 2048 --> 0.99999
|
||||
#
|
||||
# Idea Distribution Ratio = 0.98653 / (1-0.98653) = 73.24
|
||||
# Random Distribution Ration = 512 / (2350-512) = 0.279.
|
||||
#
|
||||
# Typical Distribution Ratio
|
||||
|
||||
EUCKR_TYPICAL_DISTRIBUTION_RATIO = 6.0
|
||||
|
||||
EUCKR_TABLE_SIZE = 2352
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCKR_CHAR_TO_FREQ_ORDER = (
|
||||
13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87,
|
||||
1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398,
|
||||
1399,1729,1730,1731, 141, 621, 326,1057, 368,1732, 267, 488, 20,1733,1269,1734,
|
||||
945,1400,1735, 47, 904,1270,1736,1737, 773, 248,1738, 409, 313, 786, 429,1739,
|
||||
116, 987, 813,1401, 683, 75,1204, 145,1740,1741,1742,1743, 16, 847, 667, 622,
|
||||
708,1744,1745,1746, 966, 787, 304, 129,1747, 60, 820, 123, 676,1748,1749,1750,
|
||||
1751, 617,1752, 626,1753,1754,1755,1756, 653,1757,1758,1759,1760,1761,1762, 856,
|
||||
344,1763,1764,1765,1766, 89, 401, 418, 806, 905, 848,1767,1768,1769, 946,1205,
|
||||
709,1770,1118,1771, 241,1772,1773,1774,1271,1775, 569,1776, 999,1777,1778,1779,
|
||||
1780, 337, 751,1058, 28, 628, 254,1781, 177, 906, 270, 349, 891,1079,1782, 19,
|
||||
1783, 379,1784, 315,1785, 629, 754,1402, 559,1786, 636, 203,1206,1787, 710, 567,
|
||||
1788, 935, 814,1789,1790,1207, 766, 528,1791,1792,1208,1793,1794,1795,1796,1797,
|
||||
1403,1798,1799, 533,1059,1404,1405,1156,1406, 936, 884,1080,1800, 351,1801,1802,
|
||||
1803,1804,1805, 801,1806,1807,1808,1119,1809,1157, 714, 474,1407,1810, 298, 899,
|
||||
885,1811,1120, 802,1158,1812, 892,1813,1814,1408, 659,1815,1816,1121,1817,1818,
|
||||
1819,1820,1821,1822, 319,1823, 594, 545,1824, 815, 937,1209,1825,1826, 573,1409,
|
||||
1022,1827,1210,1828,1829,1830,1831,1832,1833, 556, 722, 807,1122,1060,1834, 697,
|
||||
1835, 900, 557, 715,1836,1410, 540,1411, 752,1159, 294, 597,1211, 976, 803, 770,
|
||||
1412,1837,1838, 39, 794,1413, 358,1839, 371, 925,1840, 453, 661, 788, 531, 723,
|
||||
544,1023,1081, 869, 91,1841, 392, 430, 790, 602,1414, 677,1082, 457,1415,1416,
|
||||
1842,1843, 475, 327,1024,1417, 795, 121,1844, 733, 403,1418,1845,1846,1847, 300,
|
||||
119, 711,1212, 627,1848,1272, 207,1849,1850, 796,1213, 382,1851, 519,1852,1083,
|
||||
893,1853,1854,1855, 367, 809, 487, 671,1856, 663,1857,1858, 956, 471, 306, 857,
|
||||
1859,1860,1160,1084,1861,1862,1863,1864,1865,1061,1866,1867,1868,1869,1870,1871,
|
||||
282, 96, 574,1872, 502,1085,1873,1214,1874, 907,1875,1876, 827, 977,1419,1420,
|
||||
1421, 268,1877,1422,1878,1879,1880, 308,1881, 2, 537,1882,1883,1215,1884,1885,
|
||||
127, 791,1886,1273,1423,1887, 34, 336, 404, 643,1888, 571, 654, 894, 840,1889,
|
||||
0, 886,1274, 122, 575, 260, 908, 938,1890,1275, 410, 316,1891,1892, 100,1893,
|
||||
1894,1123, 48,1161,1124,1025,1895, 633, 901,1276,1896,1897, 115, 816,1898, 317,
|
||||
1899, 694,1900, 909, 734,1424, 572, 866,1425, 691, 85, 524,1010, 543, 394, 841,
|
||||
1901,1902,1903,1026,1904,1905,1906,1907,1908,1909, 30, 451, 651, 988, 310,1910,
|
||||
1911,1426, 810,1216, 93,1912,1913,1277,1217,1914, 858, 759, 45, 58, 181, 610,
|
||||
269,1915,1916, 131,1062, 551, 443,1000, 821,1427, 957, 895,1086,1917,1918, 375,
|
||||
1919, 359,1920, 687,1921, 822,1922, 293,1923,1924, 40, 662, 118, 692, 29, 939,
|
||||
887, 640, 482, 174,1925, 69,1162, 728,1428, 910,1926,1278,1218,1279, 386, 870,
|
||||
217, 854,1163, 823,1927,1928,1929,1930, 834,1931, 78,1932, 859,1933,1063,1934,
|
||||
1935,1936,1937, 438,1164, 208, 595,1938,1939,1940,1941,1219,1125,1942, 280, 888,
|
||||
1429,1430,1220,1431,1943,1944,1945,1946,1947,1280, 150, 510,1432,1948,1949,1950,
|
||||
1951,1952,1953,1954,1011,1087,1955,1433,1043,1956, 881,1957, 614, 958,1064,1065,
|
||||
1221,1958, 638,1001, 860, 967, 896,1434, 989, 492, 553,1281,1165,1959,1282,1002,
|
||||
1283,1222,1960,1961,1962,1963, 36, 383, 228, 753, 247, 454,1964, 876, 678,1965,
|
||||
1966,1284, 126, 464, 490, 835, 136, 672, 529, 940,1088,1435, 473,1967,1968, 467,
|
||||
50, 390, 227, 587, 279, 378, 598, 792, 968, 240, 151, 160, 849, 882,1126,1285,
|
||||
639,1044, 133, 140, 288, 360, 811, 563,1027, 561, 142, 523,1969,1970,1971, 7,
|
||||
103, 296, 439, 407, 506, 634, 990,1972,1973,1974,1975, 645,1976,1977,1978,1979,
|
||||
1980,1981, 236,1982,1436,1983,1984,1089, 192, 828, 618, 518,1166, 333,1127,1985,
|
||||
818,1223,1986,1987,1988,1989,1990,1991,1992,1993, 342,1128,1286, 746, 842,1994,
|
||||
1995, 560, 223,1287, 98, 8, 189, 650, 978,1288,1996,1437,1997, 17, 345, 250,
|
||||
423, 277, 234, 512, 226, 97, 289, 42, 167,1998, 201,1999,2000, 843, 836, 824,
|
||||
532, 338, 783,1090, 182, 576, 436,1438,1439, 527, 500,2001, 947, 889,2002,2003,
|
||||
2004,2005, 262, 600, 314, 447,2006, 547,2007, 693, 738,1129,2008, 71,1440, 745,
|
||||
619, 688,2009, 829,2010,2011, 147,2012, 33, 948,2013,2014, 74, 224,2015, 61,
|
||||
191, 918, 399, 637,2016,1028,1130, 257, 902,2017,2018,2019,2020,2021,2022,2023,
|
||||
2024,2025,2026, 837,2027,2028,2029,2030, 179, 874, 591, 52, 724, 246,2031,2032,
|
||||
2033,2034,1167, 969,2035,1289, 630, 605, 911,1091,1168,2036,2037,2038,1441, 912,
|
||||
2039, 623,2040,2041, 253,1169,1290,2042,1442, 146, 620, 611, 577, 433,2043,1224,
|
||||
719,1170, 959, 440, 437, 534, 84, 388, 480,1131, 159, 220, 198, 679,2044,1012,
|
||||
819,1066,1443, 113,1225, 194, 318,1003,1029,2045,2046,2047,2048,1067,2049,2050,
|
||||
2051,2052,2053, 59, 913, 112,2054, 632,2055, 455, 144, 739,1291,2056, 273, 681,
|
||||
499,2057, 448,2058,2059, 760,2060,2061, 970, 384, 169, 245,1132,2062,2063, 414,
|
||||
1444,2064,2065, 41, 235,2066, 157, 252, 877, 568, 919, 789, 580,2067, 725,2068,
|
||||
2069,1292,2070,2071,1445,2072,1446,2073,2074, 55, 588, 66,1447, 271,1092,2075,
|
||||
1226,2076, 960,1013, 372,2077,2078,2079,2080,2081,1293,2082,2083,2084,2085, 850,
|
||||
2086,2087,2088,2089,2090, 186,2091,1068, 180,2092,2093,2094, 109,1227, 522, 606,
|
||||
2095, 867,1448,1093, 991,1171, 926, 353,1133,2096, 581,2097,2098,2099,1294,1449,
|
||||
1450,2100, 596,1172,1014,1228,2101,1451,1295,1173,1229,2102,2103,1296,1134,1452,
|
||||
949,1135,2104,2105,1094,1453,1454,1455,2106,1095,2107,2108,2109,2110,2111,2112,
|
||||
2113,2114,2115,2116,2117, 804,2118,2119,1230,1231, 805,1456, 405,1136,2120,2121,
|
||||
2122,2123,2124, 720, 701,1297, 992,1457, 927,1004,2125,2126,2127,2128,2129,2130,
|
||||
22, 417,2131, 303,2132, 385,2133, 971, 520, 513,2134,1174, 73,1096, 231, 274,
|
||||
962,1458, 673,2135,1459,2136, 152,1137,2137,2138,2139,2140,1005,1138,1460,1139,
|
||||
2141,2142,2143,2144, 11, 374, 844,2145, 154,1232, 46,1461,2146, 838, 830, 721,
|
||||
1233, 106,2147, 90, 428, 462, 578, 566,1175, 352,2148,2149, 538,1234, 124,1298,
|
||||
2150,1462, 761, 565,2151, 686,2152, 649,2153, 72, 173,2154, 460, 415,2155,1463,
|
||||
2156,1235, 305,2157,2158,2159,2160,2161,2162, 579,2163,2164,2165,2166,2167, 747,
|
||||
2168,2169,2170,2171,1464, 669,2172,2173,2174,2175,2176,1465,2177, 23, 530, 285,
|
||||
2178, 335, 729,2179, 397,2180,2181,2182,1030,2183,2184, 698,2185,2186, 325,2187,
|
||||
2188, 369,2189, 799,1097,1015, 348,2190,1069, 680,2191, 851,1466,2192,2193, 10,
|
||||
2194, 613, 424,2195, 979, 108, 449, 589, 27, 172, 81,1031, 80, 774, 281, 350,
|
||||
1032, 525, 301, 582,1176,2196, 674,1045,2197,2198,1467, 730, 762,2199,2200,2201,
|
||||
2202,1468,2203, 993,2204,2205, 266,1070, 963,1140,2206,2207,2208, 664,1098, 972,
|
||||
2209,2210,2211,1177,1469,1470, 871,2212,2213,2214,2215,2216,1471,2217,2218,2219,
|
||||
2220,2221,2222,2223,2224,2225,2226,2227,1472,1236,2228,2229,2230,2231,2232,2233,
|
||||
2234,2235,1299,2236,2237, 200,2238, 477, 373,2239,2240, 731, 825, 777,2241,2242,
|
||||
2243, 521, 486, 548,2244,2245,2246,1473,1300, 53, 549, 137, 875, 76, 158,2247,
|
||||
1301,1474, 469, 396,1016, 278, 712,2248, 321, 442, 503, 767, 744, 941,1237,1178,
|
||||
1475,2249, 82, 178,1141,1179, 973,2250,1302,2251, 297,2252,2253, 570,2254,2255,
|
||||
2256, 18, 450, 206,2257, 290, 292,1142,2258, 511, 162, 99, 346, 164, 735,2259,
|
||||
1476,1477, 4, 554, 343, 798,1099,2260,1100,2261, 43, 171,1303, 139, 215,2262,
|
||||
2263, 717, 775,2264,1033, 322, 216,2265, 831,2266, 149,2267,1304,2268,2269, 702,
|
||||
1238, 135, 845, 347, 309,2270, 484,2271, 878, 655, 238,1006,1478,2272, 67,2273,
|
||||
295,2274,2275, 461,2276, 478, 942, 412,2277,1034,2278,2279,2280, 265,2281, 541,
|
||||
2282,2283,2284,2285,2286, 70, 852,1071,2287,2288,2289,2290, 21, 56, 509, 117,
|
||||
432,2291,2292, 331, 980, 552,1101, 148, 284, 105, 393,1180,1239, 755,2293, 187,
|
||||
2294,1046,1479,2295, 340,2296, 63,1047, 230,2297,2298,1305, 763,1306, 101, 800,
|
||||
808, 494,2299,2300,2301, 903,2302, 37,1072, 14, 5,2303, 79, 675,2304, 312,
|
||||
2305,2306,2307,2308,2309,1480, 6,1307,2310,2311,2312, 1, 470, 35, 24, 229,
|
||||
2313, 695, 210, 86, 778, 15, 784, 592, 779, 32, 77, 855, 964,2314, 259,2315,
|
||||
501, 380,2316,2317, 83, 981, 153, 689,1308,1481,1482,1483,2318,2319, 716,1484,
|
||||
2320,2321,2322,2323,2324,2325,1485,2326,2327, 128, 57, 68, 261,1048, 211, 170,
|
||||
1240, 31,2328, 51, 435, 742,2329,2330,2331, 635,2332, 264, 456,2333,2334,2335,
|
||||
425,2336,1486, 143, 507, 263, 943,2337, 363, 920,1487, 256,1488,1102, 243, 601,
|
||||
1489,2338,2339,2340,2341,2342,2343,2344, 861,2345,2346,2347,2348,2349,2350, 395,
|
||||
2351,1490,1491, 62, 535, 166, 225,2352,2353, 668, 419,1241, 138, 604, 928,2354,
|
||||
1181,2355,1492,1493,2356,2357,2358,1143,2359, 696,2360, 387, 307,1309, 682, 476,
|
||||
2361,2362, 332, 12, 222, 156,2363, 232,2364, 641, 276, 656, 517,1494,1495,1035,
|
||||
416, 736,1496,2365,1017, 586,2366,2367,2368,1497,2369, 242,2370,2371,2372,1498,
|
||||
2373, 965, 713,2374,2375,2376,2377, 740, 982,1499, 944,1500,1007,2378,2379,1310,
|
||||
1501,2380,2381,2382, 785, 329,2383,2384,1502,2385,2386,2387, 932,2388,1503,2389,
|
||||
2390,2391,2392,1242,2393,2394,2395,2396,2397, 994, 950,2398,2399,2400,2401,1504,
|
||||
1311,2402,2403,2404,2405,1049, 749,2406,2407, 853, 718,1144,1312,2408,1182,1505,
|
||||
2409,2410, 255, 516, 479, 564, 550, 214,1506,1507,1313, 413, 239, 444, 339,1145,
|
||||
1036,1508,1509,1314,1037,1510,1315,2411,1511,2412,2413,2414, 176, 703, 497, 624,
|
||||
593, 921, 302,2415, 341, 165,1103,1512,2416,1513,2417,2418,2419, 376,2420, 700,
|
||||
2421,2422,2423, 258, 768,1316,2424,1183,2425, 995, 608,2426,2427,2428,2429, 221,
|
||||
2430,2431,2432,2433,2434,2435,2436,2437, 195, 323, 726, 188, 897, 983,1317, 377,
|
||||
644,1050, 879,2438, 452,2439,2440,2441,2442,2443,2444, 914,2445,2446,2447,2448,
|
||||
915, 489,2449,1514,1184,2450,2451, 515, 64, 427, 495,2452, 583,2453, 483, 485,
|
||||
1038, 562, 213,1515, 748, 666,2454,2455,2456,2457, 334,2458, 780, 996,1008, 705,
|
||||
1243,2459,2460,2461,2462,2463, 114,2464, 493,1146, 366, 163,1516, 961,1104,2465,
|
||||
291,2466,1318,1105,2467,1517, 365,2468, 355, 951,1244,2469,1319,2470, 631,2471,
|
||||
2472, 218,1320, 364, 320, 756,1518,1519,1321,1520,1322,2473,2474,2475,2476, 997,
|
||||
2477,2478,2479,2480, 665,1185,2481, 916,1521,2482,2483,2484, 584, 684,2485,2486,
|
||||
797,2487,1051,1186,2488,2489,2490,1522,2491,2492, 370,2493,1039,1187, 65,2494,
|
||||
434, 205, 463,1188,2495, 125, 812, 391, 402, 826, 699, 286, 398, 155, 781, 771,
|
||||
585,2496, 590, 505,1073,2497, 599, 244, 219, 917,1018, 952, 646,1523,2498,1323,
|
||||
2499,2500, 49, 984, 354, 741,2501, 625,2502,1324,2503,1019, 190, 357, 757, 491,
|
||||
95, 782, 868,2504,2505,2506,2507,2508,2509, 134,1524,1074, 422,1525, 898,2510,
|
||||
161,2511,2512,2513,2514, 769,2515,1526,2516,2517, 411,1325,2518, 472,1527,2519,
|
||||
2520,2521,2522,2523,2524, 985,2525,2526,2527,2528,2529,2530, 764,2531,1245,2532,
|
||||
2533, 25, 204, 311,2534, 496,2535,1052,2536,2537,2538,2539,2540,2541,2542, 199,
|
||||
704, 504, 468, 758, 657,1528, 196, 44, 839,1246, 272, 750,2543, 765, 862,2544,
|
||||
2545,1326,2546, 132, 615, 933,2547, 732,2548,2549,2550,1189,1529,2551, 283,1247,
|
||||
1053, 607, 929,2552,2553,2554, 930, 183, 872, 616,1040,1147,2555,1148,1020, 441,
|
||||
249,1075,2556,2557,2558, 466, 743,2559,2560,2561, 92, 514, 426, 420, 526,2562,
|
||||
2563,2564,2565,2566,2567,2568, 185,2569,2570,2571,2572, 776,1530, 658,2573, 362,
|
||||
2574, 361, 922,1076, 793,2575,2576,2577,2578,2579,2580,1531, 251,2581,2582,2583,
|
||||
2584,1532, 54, 612, 237,1327,2585,2586, 275, 408, 647, 111,2587,1533,1106, 465,
|
||||
3, 458, 9, 38,2588, 107, 110, 890, 209, 26, 737, 498,2589,1534,2590, 431,
|
||||
202, 88,1535, 356, 287,1107, 660,1149,2591, 381,1536, 986,1150, 445,1248,1151,
|
||||
974,2592,2593, 846,2594, 446, 953, 184,1249,1250, 727,2595, 923, 193, 883,2596,
|
||||
2597,2598, 102, 324, 539, 817,2599, 421,1041,2600, 832,2601, 94, 175, 197, 406,
|
||||
2602, 459,2603,2604,2605,2606,2607, 330, 555,2608,2609,2610, 706,1108, 389,2611,
|
||||
2612,2613,2614, 233,2615, 833, 558, 931, 954,1251,2616,2617,1537, 546,2618,2619,
|
||||
1009,2620,2621,2622,1538, 690,1328,2623, 955,2624,1539,2625,2626, 772,2627,2628,
|
||||
2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042,
|
||||
670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256
|
||||
)
|
||||
|
||||
47
venv/Lib/site-packages/pip/_vendor/chardet/euckrprober.py
Normal file
47
venv/Lib/site-packages/pip/_vendor/chardet/euckrprober.py
Normal file
@@ -0,0 +1,47 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCKRDistributionAnalysis
|
||||
from .mbcssm import EUCKR_SM_MODEL
|
||||
|
||||
|
||||
class EUCKRProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCKRProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
|
||||
self.distribution_analyzer = EUCKRDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-KR"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Korean"
|
||||
387
venv/Lib/site-packages/pip/_vendor/chardet/euctwfreq.py
Normal file
387
venv/Lib/site-packages/pip/_vendor/chardet/euctwfreq.py
Normal file
@@ -0,0 +1,387 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# EUCTW frequency table
|
||||
# Converted from big5 work
|
||||
# by Taiwan's Mandarin Promotion Council
|
||||
# <http:#www.edu.tw:81/mandr/>
|
||||
|
||||
# 128 --> 0.42261
|
||||
# 256 --> 0.57851
|
||||
# 512 --> 0.74851
|
||||
# 1024 --> 0.89384
|
||||
# 2048 --> 0.97583
|
||||
#
|
||||
# Idea Distribution Ratio = 0.74851/(1-0.74851) =2.98
|
||||
# Random Distribution Ration = 512/(5401-512)=0.105
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher than RDR
|
||||
|
||||
EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
EUCTW_TABLE_SIZE = 5376
|
||||
|
||||
EUCTW_CHAR_TO_FREQ_ORDER = (
|
||||
1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742
|
||||
3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758
|
||||
1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774
|
||||
63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790
|
||||
3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806
|
||||
4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822
|
||||
7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838
|
||||
630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854
|
||||
179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870
|
||||
995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886
|
||||
2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902
|
||||
1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918
|
||||
3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934
|
||||
706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950
|
||||
1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966
|
||||
3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982
|
||||
2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998
|
||||
437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014
|
||||
3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030
|
||||
1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046
|
||||
7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062
|
||||
266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078
|
||||
7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094
|
||||
1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110
|
||||
32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126
|
||||
188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142
|
||||
3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158
|
||||
3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174
|
||||
324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190
|
||||
2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206
|
||||
2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222
|
||||
314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238
|
||||
287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254
|
||||
3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270
|
||||
1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286
|
||||
1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302
|
||||
1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318
|
||||
2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334
|
||||
265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350
|
||||
4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366
|
||||
1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382
|
||||
7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398
|
||||
2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414
|
||||
383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430
|
||||
98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446
|
||||
523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462
|
||||
710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478
|
||||
7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494
|
||||
379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510
|
||||
1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526
|
||||
585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542
|
||||
690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558
|
||||
7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574
|
||||
1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590
|
||||
544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606
|
||||
3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622
|
||||
4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638
|
||||
3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654
|
||||
279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670
|
||||
610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686
|
||||
1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702
|
||||
4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718
|
||||
3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734
|
||||
3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750
|
||||
2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766
|
||||
7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782
|
||||
3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798
|
||||
7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814
|
||||
1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830
|
||||
2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846
|
||||
1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862
|
||||
78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878
|
||||
1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894
|
||||
4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910
|
||||
3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926
|
||||
534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942
|
||||
165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958
|
||||
626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974
|
||||
2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990
|
||||
7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006
|
||||
1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022
|
||||
2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038
|
||||
1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054
|
||||
1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070
|
||||
7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086
|
||||
7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102
|
||||
7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118
|
||||
3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134
|
||||
4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150
|
||||
1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166
|
||||
7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182
|
||||
2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198
|
||||
7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214
|
||||
3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230
|
||||
3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246
|
||||
7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262
|
||||
2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278
|
||||
7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294
|
||||
862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310
|
||||
4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326
|
||||
2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342
|
||||
7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358
|
||||
3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374
|
||||
2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390
|
||||
2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406
|
||||
294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422
|
||||
2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438
|
||||
1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454
|
||||
1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470
|
||||
2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486
|
||||
1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502
|
||||
7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518
|
||||
7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534
|
||||
2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550
|
||||
4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566
|
||||
1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582
|
||||
7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598
|
||||
829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614
|
||||
4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630
|
||||
375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646
|
||||
2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662
|
||||
444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678
|
||||
1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694
|
||||
1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710
|
||||
730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726
|
||||
3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742
|
||||
3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758
|
||||
1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774
|
||||
3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790
|
||||
7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806
|
||||
7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822
|
||||
1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838
|
||||
2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854
|
||||
1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870
|
||||
3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886
|
||||
2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902
|
||||
3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918
|
||||
2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934
|
||||
4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950
|
||||
4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966
|
||||
3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982
|
||||
97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998
|
||||
3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014
|
||||
424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030
|
||||
3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046
|
||||
3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062
|
||||
3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078
|
||||
1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094
|
||||
7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110
|
||||
199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126
|
||||
7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142
|
||||
1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158
|
||||
391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174
|
||||
4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190
|
||||
3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206
|
||||
397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222
|
||||
2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238
|
||||
2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254
|
||||
3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270
|
||||
1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286
|
||||
4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302
|
||||
2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318
|
||||
1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334
|
||||
1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350
|
||||
2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366
|
||||
3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382
|
||||
1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398
|
||||
7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414
|
||||
1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430
|
||||
4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446
|
||||
1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462
|
||||
135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478
|
||||
1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494
|
||||
3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510
|
||||
3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526
|
||||
2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542
|
||||
1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558
|
||||
4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574
|
||||
660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590
|
||||
7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606
|
||||
2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622
|
||||
3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638
|
||||
4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654
|
||||
790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670
|
||||
7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686
|
||||
7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702
|
||||
1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718
|
||||
4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734
|
||||
3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750
|
||||
2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766
|
||||
3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782
|
||||
3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798
|
||||
2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814
|
||||
1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830
|
||||
4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846
|
||||
3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862
|
||||
3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878
|
||||
2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894
|
||||
4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910
|
||||
7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926
|
||||
3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942
|
||||
2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958
|
||||
3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974
|
||||
1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990
|
||||
2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006
|
||||
3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022
|
||||
4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038
|
||||
2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054
|
||||
2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070
|
||||
7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086
|
||||
1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102
|
||||
2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118
|
||||
1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134
|
||||
3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150
|
||||
4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166
|
||||
2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182
|
||||
3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198
|
||||
3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214
|
||||
2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230
|
||||
4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246
|
||||
2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262
|
||||
3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278
|
||||
4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294
|
||||
7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310
|
||||
3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326
|
||||
194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342
|
||||
1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358
|
||||
4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374
|
||||
1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390
|
||||
4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406
|
||||
7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422
|
||||
510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438
|
||||
7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454
|
||||
2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470
|
||||
1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486
|
||||
1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502
|
||||
3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518
|
||||
509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534
|
||||
552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550
|
||||
478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566
|
||||
3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582
|
||||
2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598
|
||||
751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614
|
||||
7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630
|
||||
1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646
|
||||
3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662
|
||||
7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678
|
||||
1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694
|
||||
7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710
|
||||
4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726
|
||||
1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742
|
||||
2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758
|
||||
2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774
|
||||
4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790
|
||||
802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806
|
||||
809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822
|
||||
3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838
|
||||
3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854
|
||||
1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870
|
||||
2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886
|
||||
7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902
|
||||
1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918
|
||||
1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934
|
||||
3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950
|
||||
919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966
|
||||
1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982
|
||||
4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998
|
||||
7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014
|
||||
2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030
|
||||
3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046
|
||||
516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062
|
||||
1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078
|
||||
2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094
|
||||
2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110
|
||||
7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126
|
||||
7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142
|
||||
7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158
|
||||
2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174
|
||||
2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190
|
||||
1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206
|
||||
4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222
|
||||
3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238
|
||||
3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254
|
||||
4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270
|
||||
4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286
|
||||
2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302
|
||||
2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318
|
||||
7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334
|
||||
4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350
|
||||
7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366
|
||||
2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382
|
||||
1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398
|
||||
3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414
|
||||
4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430
|
||||
2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446
|
||||
120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462
|
||||
2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478
|
||||
1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494
|
||||
2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510
|
||||
2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526
|
||||
4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542
|
||||
7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558
|
||||
1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574
|
||||
3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590
|
||||
7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606
|
||||
1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622
|
||||
8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638
|
||||
2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654
|
||||
8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670
|
||||
2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686
|
||||
2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702
|
||||
8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718
|
||||
8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734
|
||||
8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750
|
||||
408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766
|
||||
8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782
|
||||
4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798
|
||||
3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814
|
||||
8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830
|
||||
1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846
|
||||
8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862
|
||||
425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878
|
||||
1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894
|
||||
479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910
|
||||
4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926
|
||||
1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942
|
||||
4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958
|
||||
1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974
|
||||
433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990
|
||||
3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006
|
||||
4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022
|
||||
8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038
|
||||
938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054
|
||||
3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070
|
||||
890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086
|
||||
2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102
|
||||
)
|
||||
|
||||
46
venv/Lib/site-packages/pip/_vendor/chardet/euctwprober.py
Normal file
46
venv/Lib/site-packages/pip/_vendor/chardet/euctwprober.py
Normal file
@@ -0,0 +1,46 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import EUCTWDistributionAnalysis
|
||||
from .mbcssm import EUCTW_SM_MODEL
|
||||
|
||||
class EUCTWProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(EUCTWProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
|
||||
self.distribution_analyzer = EUCTWDistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "EUC-TW"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Taiwan"
|
||||
283
venv/Lib/site-packages/pip/_vendor/chardet/gb2312freq.py
Normal file
283
venv/Lib/site-packages/pip/_vendor/chardet/gb2312freq.py
Normal file
@@ -0,0 +1,283 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# GB2312 most frequently used character table
|
||||
#
|
||||
# Char to FreqOrder table , from hz6763
|
||||
|
||||
# 512 --> 0.79 -- 0.79
|
||||
# 1024 --> 0.92 -- 0.13
|
||||
# 2048 --> 0.98 -- 0.06
|
||||
# 6768 --> 1.00 -- 0.02
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.79135/(1-0.79135) = 3.79
|
||||
# Random Distribution Ration = 512 / (3755 - 512) = 0.157
|
||||
#
|
||||
# Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
|
||||
|
||||
GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
|
||||
|
||||
GB2312_TABLE_SIZE = 3760
|
||||
|
||||
GB2312_CHAR_TO_FREQ_ORDER = (
|
||||
1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
|
||||
2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
|
||||
2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
|
||||
249,4088,1746,1873,2047,1774, 581,1813, 358,1174,3590,1014,1561,4844,2245, 670,
|
||||
1636,3112, 889,1286, 953, 556,2327,3060,1290,3141, 613, 185,3477,1367, 850,3820,
|
||||
1715,2428,2642,2303,2732,3041,2562,2648,3566,3946,1349, 388,3098,2091,1360,3585,
|
||||
152,1687,1539, 738,1559, 59,1232,2925,2267,1388,1249,1741,1679,2960, 151,1566,
|
||||
1125,1352,4271, 924,4296, 385,3166,4459, 310,1245,2850, 70,3285,2729,3534,3575,
|
||||
2398,3298,3466,1960,2265, 217,3647, 864,1909,2084,4401,2773,1010,3269,5152, 853,
|
||||
3051,3121,1244,4251,1895, 364,1499,1540,2313,1180,3655,2268, 562, 715,2417,3061,
|
||||
544, 336,3768,2380,1752,4075, 950, 280,2425,4382, 183,2759,3272, 333,4297,2155,
|
||||
1688,2356,1444,1039,4540, 736,1177,3349,2443,2368,2144,2225, 565, 196,1482,3406,
|
||||
927,1335,4147, 692, 878,1311,1653,3911,3622,1378,4200,1840,2969,3149,2126,1816,
|
||||
2534,1546,2393,2760, 737,2494, 13, 447, 245,2747, 38,2765,2129,2589,1079, 606,
|
||||
360, 471,3755,2890, 404, 848, 699,1785,1236, 370,2221,1023,3746,2074,2026,2023,
|
||||
2388,1581,2119, 812,1141,3091,2536,1519, 804,2053, 406,1596,1090, 784, 548,4414,
|
||||
1806,2264,2936,1100, 343,4114,5096, 622,3358, 743,3668,1510,1626,5020,3567,2513,
|
||||
3195,4115,5627,2489,2991, 24,2065,2697,1087,2719, 48,1634, 315, 68, 985,2052,
|
||||
198,2239,1347,1107,1439, 597,2366,2172, 871,3307, 919,2487,2790,1867, 236,2570,
|
||||
1413,3794, 906,3365,3381,1701,1982,1818,1524,2924,1205, 616,2586,2072,2004, 575,
|
||||
253,3099, 32,1365,1182, 197,1714,2454,1201, 554,3388,3224,2748, 756,2587, 250,
|
||||
2567,1507,1517,3529,1922,2761,2337,3416,1961,1677,2452,2238,3153, 615, 911,1506,
|
||||
1474,2495,1265,1906,2749,3756,3280,2161, 898,2714,1759,3450,2243,2444, 563, 26,
|
||||
3286,2266,3769,3344,2707,3677, 611,1402, 531,1028,2871,4548,1375, 261,2948, 835,
|
||||
1190,4134, 353, 840,2684,1900,3082,1435,2109,1207,1674, 329,1872,2781,4055,2686,
|
||||
2104, 608,3318,2423,2957,2768,1108,3739,3512,3271,3985,2203,1771,3520,1418,2054,
|
||||
1681,1153, 225,1627,2929, 162,2050,2511,3687,1954, 124,1859,2431,1684,3032,2894,
|
||||
585,4805,3969,2869,2704,2088,2032,2095,3656,2635,4362,2209, 256, 518,2042,2105,
|
||||
3777,3657, 643,2298,1148,1779, 190, 989,3544, 414, 11,2135,2063,2979,1471, 403,
|
||||
3678, 126, 770,1563, 671,2499,3216,2877, 600,1179, 307,2805,4937,1268,1297,2694,
|
||||
252,4032,1448,1494,1331,1394, 127,2256, 222,1647,1035,1481,3056,1915,1048, 873,
|
||||
3651, 210, 33,1608,2516, 200,1520, 415, 102, 0,3389,1287, 817, 91,3299,2940,
|
||||
836,1814, 549,2197,1396,1669,2987,3582,2297,2848,4528,1070, 687, 20,1819, 121,
|
||||
1552,1364,1461,1968,2617,3540,2824,2083, 177, 948,4938,2291, 110,4549,2066, 648,
|
||||
3359,1755,2110,2114,4642,4845,1693,3937,3308,1257,1869,2123, 208,1804,3159,2992,
|
||||
2531,2549,3361,2418,1350,2347,2800,2568,1291,2036,2680, 72, 842,1990, 212,1233,
|
||||
1154,1586, 75,2027,3410,4900,1823,1337,2710,2676, 728,2810,1522,3026,4995, 157,
|
||||
755,1050,4022, 710, 785,1936,2194,2085,1406,2777,2400, 150,1250,4049,1206, 807,
|
||||
1910, 534, 529,3309,1721,1660, 274, 39,2827, 661,2670,1578, 925,3248,3815,1094,
|
||||
4278,4901,4252, 41,1150,3747,2572,2227,4501,3658,4902,3813,3357,3617,2884,2258,
|
||||
887, 538,4187,3199,1294,2439,3042,2329,2343,2497,1255, 107, 543,1527, 521,3478,
|
||||
3568, 194,5062, 15, 961,3870,1241,1192,2664, 66,5215,3260,2111,1295,1127,2152,
|
||||
3805,4135, 901,1164,1976, 398,1278, 530,1460, 748, 904,1054,1966,1426, 53,2909,
|
||||
509, 523,2279,1534, 536,1019, 239,1685, 460,2353, 673,1065,2401,3600,4298,2272,
|
||||
1272,2363, 284,1753,3679,4064,1695, 81, 815,2677,2757,2731,1386, 859, 500,4221,
|
||||
2190,2566, 757,1006,2519,2068,1166,1455, 337,2654,3203,1863,1682,1914,3025,1252,
|
||||
1409,1366, 847, 714,2834,2038,3209, 964,2970,1901, 885,2553,1078,1756,3049, 301,
|
||||
1572,3326, 688,2130,1996,2429,1805,1648,2930,3421,2750,3652,3088, 262,1158,1254,
|
||||
389,1641,1812, 526,1719, 923,2073,1073,1902, 468, 489,4625,1140, 857,2375,3070,
|
||||
3319,2863, 380, 116,1328,2693,1161,2244, 273,1212,1884,2769,3011,1775,1142, 461,
|
||||
3066,1200,2147,2212, 790, 702,2695,4222,1601,1058, 434,2338,5153,3640, 67,2360,
|
||||
4099,2502, 618,3472,1329, 416,1132, 830,2782,1807,2653,3211,3510,1662, 192,2124,
|
||||
296,3979,1739,1611,3684, 23, 118, 324, 446,1239,1225, 293,2520,3814,3795,2535,
|
||||
3116, 17,1074, 467,2692,2201, 387,2922, 45,1326,3055,1645,3659,2817, 958, 243,
|
||||
1903,2320,1339,2825,1784,3289, 356, 576, 865,2315,2381,3377,3916,1088,3122,1713,
|
||||
1655, 935, 628,4689,1034,1327, 441, 800, 720, 894,1979,2183,1528,5289,2702,1071,
|
||||
4046,3572,2399,1571,3281, 79, 761,1103, 327, 134, 758,1899,1371,1615, 879, 442,
|
||||
215,2605,2579, 173,2048,2485,1057,2975,3317,1097,2253,3801,4263,1403,1650,2946,
|
||||
814,4968,3487,1548,2644,1567,1285, 2, 295,2636, 97, 946,3576, 832, 141,4257,
|
||||
3273, 760,3821,3521,3156,2607, 949,1024,1733,1516,1803,1920,2125,2283,2665,3180,
|
||||
1501,2064,3560,2171,1592, 803,3518,1416, 732,3897,4258,1363,1362,2458, 119,1427,
|
||||
602,1525,2608,1605,1639,3175, 694,3064, 10, 465, 76,2000,4846,4208, 444,3781,
|
||||
1619,3353,2206,1273,3796, 740,2483, 320,1723,2377,3660,2619,1359,1137,1762,1724,
|
||||
2345,2842,1850,1862, 912, 821,1866, 612,2625,1735,2573,3369,1093, 844, 89, 937,
|
||||
930,1424,3564,2413,2972,1004,3046,3019,2011, 711,3171,1452,4178, 428, 801,1943,
|
||||
432, 445,2811, 206,4136,1472, 730, 349, 73, 397,2802,2547, 998,1637,1167, 789,
|
||||
396,3217, 154,1218, 716,1120,1780,2819,4826,1931,3334,3762,2139,1215,2627, 552,
|
||||
3664,3628,3232,1405,2383,3111,1356,2652,3577,3320,3101,1703, 640,1045,1370,1246,
|
||||
4996, 371,1575,2436,1621,2210, 984,4033,1734,2638, 16,4529, 663,2755,3255,1451,
|
||||
3917,2257,1253,1955,2234,1263,2951, 214,1229, 617, 485, 359,1831,1969, 473,2310,
|
||||
750,2058, 165, 80,2864,2419, 361,4344,2416,2479,1134, 796,3726,1266,2943, 860,
|
||||
2715, 938, 390,2734,1313,1384, 248, 202, 877,1064,2854, 522,3907, 279,1602, 297,
|
||||
2357, 395,3740, 137,2075, 944,4089,2584,1267,3802, 62,1533,2285, 178, 176, 780,
|
||||
2440, 201,3707, 590, 478,1560,4354,2117,1075, 30, 74,4643,4004,1635,1441,2745,
|
||||
776,2596, 238,1077,1692,1912,2844, 605, 499,1742,3947, 241,3053, 980,1749, 936,
|
||||
2640,4511,2582, 515,1543,2162,5322,2892,2993, 890,2148,1924, 665,1827,3581,1032,
|
||||
968,3163, 339,1044,1896, 270, 583,1791,1720,4367,1194,3488,3669, 43,2523,1657,
|
||||
163,2167, 290,1209,1622,3378, 550, 634,2508,2510, 695,2634,2384,2512,1476,1414,
|
||||
220,1469,2341,2138,2852,3183,2900,4939,2865,3502,1211,3680, 854,3227,1299,2976,
|
||||
3172, 186,2998,1459, 443,1067,3251,1495, 321,1932,3054, 909, 753,1410,1828, 436,
|
||||
2441,1119,1587,3164,2186,1258, 227, 231,1425,1890,3200,3942, 247, 959, 725,5254,
|
||||
2741, 577,2158,2079, 929, 120, 174, 838,2813, 591,1115, 417,2024, 40,3240,1536,
|
||||
1037, 291,4151,2354, 632,1298,2406,2500,3535,1825,1846,3451, 205,1171, 345,4238,
|
||||
18,1163, 811, 685,2208,1217, 425,1312,1508,1175,4308,2552,1033, 587,1381,3059,
|
||||
2984,3482, 340,1316,4023,3972, 792,3176, 519, 777,4690, 918, 933,4130,2981,3741,
|
||||
90,3360,2911,2200,5184,4550, 609,3079,2030, 272,3379,2736, 363,3881,1130,1447,
|
||||
286, 779, 357,1169,3350,3137,1630,1220,2687,2391, 747,1277,3688,2618,2682,2601,
|
||||
1156,3196,5290,4034,3102,1689,3596,3128, 874, 219,2783, 798, 508,1843,2461, 269,
|
||||
1658,1776,1392,1913,2983,3287,2866,2159,2372, 829,4076, 46,4253,2873,1889,1894,
|
||||
915,1834,1631,2181,2318, 298, 664,2818,3555,2735, 954,3228,3117, 527,3511,2173,
|
||||
681,2712,3033,2247,2346,3467,1652, 155,2164,3382, 113,1994, 450, 899, 494, 994,
|
||||
1237,2958,1875,2336,1926,3727, 545,1577,1550, 633,3473, 204,1305,3072,2410,1956,
|
||||
2471, 707,2134, 841,2195,2196,2663,3843,1026,4940, 990,3252,4997, 368,1092, 437,
|
||||
3212,3258,1933,1829, 675,2977,2893, 412, 943,3723,4644,3294,3283,2230,2373,5154,
|
||||
2389,2241,2661,2323,1404,2524, 593, 787, 677,3008,1275,2059, 438,2709,2609,2240,
|
||||
2269,2246,1446, 36,1568,1373,3892,1574,2301,1456,3962, 693,2276,5216,2035,1143,
|
||||
2720,1919,1797,1811,2763,4137,2597,1830,1699,1488,1198,2090, 424,1694, 312,3634,
|
||||
3390,4179,3335,2252,1214, 561,1059,3243,2295,2561, 975,5155,2321,2751,3772, 472,
|
||||
1537,3282,3398,1047,2077,2348,2878,1323,3340,3076, 690,2906, 51, 369, 170,3541,
|
||||
1060,2187,2688,3670,2541,1083,1683, 928,3918, 459, 109,4427, 599,3744,4286, 143,
|
||||
2101,2730,2490, 82,1588,3036,2121, 281,1860, 477,4035,1238,2812,3020,2716,3312,
|
||||
1530,2188,2055,1317, 843, 636,1808,1173,3495, 649, 181,1002, 147,3641,1159,2414,
|
||||
3750,2289,2795, 813,3123,2610,1136,4368, 5,3391,4541,2174, 420, 429,1728, 754,
|
||||
1228,2115,2219, 347,2223,2733, 735,1518,3003,2355,3134,1764,3948,3329,1888,2424,
|
||||
1001,1234,1972,3321,3363,1672,1021,1450,1584, 226, 765, 655,2526,3404,3244,2302,
|
||||
3665, 731, 594,2184, 319,1576, 621, 658,2656,4299,2099,3864,1279,2071,2598,2739,
|
||||
795,3086,3699,3908,1707,2352,2402,1382,3136,2475,1465,4847,3496,3865,1085,3004,
|
||||
2591,1084, 213,2287,1963,3565,2250, 822, 793,4574,3187,1772,1789,3050, 595,1484,
|
||||
1959,2770,1080,2650, 456, 422,2996, 940,3322,4328,4345,3092,2742, 965,2784, 739,
|
||||
4124, 952,1358,2498,2949,2565, 332,2698,2378, 660,2260,2473,4194,3856,2919, 535,
|
||||
1260,2651,1208,1428,1300,1949,1303,2942, 433,2455,2450,1251,1946, 614,1269, 641,
|
||||
1306,1810,2737,3078,2912, 564,2365,1419,1415,1497,4460,2367,2185,1379,3005,1307,
|
||||
3218,2175,1897,3063, 682,1157,4040,4005,1712,1160,1941,1399, 394, 402,2952,1573,
|
||||
1151,2986,2404, 862, 299,2033,1489,3006, 346, 171,2886,3401,1726,2932, 168,2533,
|
||||
47,2507,1030,3735,1145,3370,1395,1318,1579,3609,4560,2857,4116,1457,2529,1965,
|
||||
504,1036,2690,2988,2405, 745,5871, 849,2397,2056,3081, 863,2359,3857,2096, 99,
|
||||
1397,1769,2300,4428,1643,3455,1978,1757,3718,1440, 35,4879,3742,1296,4228,2280,
|
||||
160,5063,1599,2013, 166, 520,3479,1646,3345,3012, 490,1937,1545,1264,2182,2505,
|
||||
1096,1188,1369,1436,2421,1667,2792,2460,1270,2122, 727,3167,2143, 806,1706,1012,
|
||||
1800,3037, 960,2218,1882, 805, 139,2456,1139,1521, 851,1052,3093,3089, 342,2039,
|
||||
744,5097,1468,1502,1585,2087, 223, 939, 326,2140,2577, 892,2481,1623,4077, 982,
|
||||
3708, 135,2131, 87,2503,3114,2326,1106, 876,1616, 547,2997,2831,2093,3441,4530,
|
||||
4314, 9,3256,4229,4148, 659,1462,1986,1710,2046,2913,2231,4090,4880,5255,3392,
|
||||
3274,1368,3689,4645,1477, 705,3384,3635,1068,1529,2941,1458,3782,1509, 100,1656,
|
||||
2548, 718,2339, 408,1590,2780,3548,1838,4117,3719,1345,3530, 717,3442,2778,3220,
|
||||
2898,1892,4590,3614,3371,2043,1998,1224,3483, 891, 635, 584,2559,3355, 733,1766,
|
||||
1729,1172,3789,1891,2307, 781,2982,2271,1957,1580,5773,2633,2005,4195,3097,1535,
|
||||
3213,1189,1934,5693,3262, 586,3118,1324,1598, 517,1564,2217,1868,1893,4445,3728,
|
||||
2703,3139,1526,1787,1992,3882,2875,1549,1199,1056,2224,1904,2711,5098,4287, 338,
|
||||
1993,3129,3489,2689,1809,2815,1997, 957,1855,3898,2550,3275,3057,1105,1319, 627,
|
||||
1505,1911,1883,3526, 698,3629,3456,1833,1431, 746, 77,1261,2017,2296,1977,1885,
|
||||
125,1334,1600, 525,1798,1109,2222,1470,1945, 559,2236,1186,3443,2476,1929,1411,
|
||||
2411,3135,1777,3372,2621,1841,1613,3229, 668,1430,1839,2643,2916, 195,1989,2671,
|
||||
2358,1387, 629,3205,2293,5256,4439, 123,1310, 888,1879,4300,3021,3605,1003,1162,
|
||||
3192,2910,2010, 140,2395,2859, 55,1082,2012,2901, 662, 419,2081,1438, 680,2774,
|
||||
4654,3912,1620,1731,1625,5035,4065,2328, 512,1344, 802,5443,2163,2311,2537, 524,
|
||||
3399, 98,1155,2103,1918,2606,3925,2816,1393,2465,1504,3773,2177,3963,1478,4346,
|
||||
180,1113,4655,3461,2028,1698, 833,2696,1235,1322,1594,4408,3623,3013,3225,2040,
|
||||
3022, 541,2881, 607,3632,2029,1665,1219, 639,1385,1686,1099,2803,3231,1938,3188,
|
||||
2858, 427, 676,2772,1168,2025, 454,3253,2486,3556, 230,1950, 580, 791,1991,1280,
|
||||
1086,1974,2034, 630, 257,3338,2788,4903,1017, 86,4790, 966,2789,1995,1696,1131,
|
||||
259,3095,4188,1308, 179,1463,5257, 289,4107,1248, 42,3413,1725,2288, 896,1947,
|
||||
774,4474,4254, 604,3430,4264, 392,2514,2588, 452, 237,1408,3018, 988,4531,1970,
|
||||
3034,3310, 540,2370,1562,1288,2990, 502,4765,1147, 4,1853,2708, 207, 294,2814,
|
||||
4078,2902,2509, 684, 34,3105,3532,2551, 644, 709,2801,2344, 573,1727,3573,3557,
|
||||
2021,1081,3100,4315,2100,3681, 199,2263,1837,2385, 146,3484,1195,2776,3949, 997,
|
||||
1939,3973,1008,1091,1202,1962,1847,1149,4209,5444,1076, 493, 117,5400,2521, 972,
|
||||
1490,2934,1796,4542,2374,1512,2933,2657, 413,2888,1135,2762,2314,2156,1355,2369,
|
||||
766,2007,2527,2170,3124,2491,2593,2632,4757,2437, 234,3125,3591,1898,1750,1376,
|
||||
1942,3468,3138, 570,2127,2145,3276,4131, 962, 132,1445,4196, 19, 941,3624,3480,
|
||||
3366,1973,1374,4461,3431,2629, 283,2415,2275, 808,2887,3620,2112,2563,1353,3610,
|
||||
955,1089,3103,1053, 96, 88,4097, 823,3808,1583, 399, 292,4091,3313, 421,1128,
|
||||
642,4006, 903,2539,1877,2082, 596, 29,4066,1790, 722,2157, 130, 995,1569, 769,
|
||||
1485, 464, 513,2213, 288,1923,1101,2453,4316, 133, 486,2445, 50, 625, 487,2207,
|
||||
57, 423, 481,2962, 159,3729,1558, 491, 303, 482, 501, 240,2837, 112,3648,2392,
|
||||
1783, 362, 8,3433,3422, 610,2793,3277,1390,1284,1654, 21,3823, 734, 367, 623,
|
||||
193, 287, 374,1009,1483, 816, 476, 313,2255,2340,1262,2150,2899,1146,2581, 782,
|
||||
2116,1659,2018,1880, 255,3586,3314,1110,2867,2137,2564, 986,2767,5185,2006, 650,
|
||||
158, 926, 762, 881,3157,2717,2362,3587, 306,3690,3245,1542,3077,2427,1691,2478,
|
||||
2118,2985,3490,2438, 539,2305, 983, 129,1754, 355,4201,2386, 827,2923, 104,1773,
|
||||
2838,2771, 411,2905,3919, 376, 767, 122,1114, 828,2422,1817,3506, 266,3460,1007,
|
||||
1609,4998, 945,2612,4429,2274, 726,1247,1964,2914,2199,2070,4002,4108, 657,3323,
|
||||
1422, 579, 455,2764,4737,1222,2895,1670, 824,1223,1487,2525, 558, 861,3080, 598,
|
||||
2659,2515,1967, 752,2583,2376,2214,4180, 977, 704,2464,4999,2622,4109,1210,2961,
|
||||
819,1541, 142,2284, 44, 418, 457,1126,3730,4347,4626,1644,1876,3671,1864, 302,
|
||||
1063,5694, 624, 723,1984,3745,1314,1676,2488,1610,1449,3558,3569,2166,2098, 409,
|
||||
1011,2325,3704,2306, 818,1732,1383,1824,1844,3757, 999,2705,3497,1216,1423,2683,
|
||||
2426,2954,2501,2726,2229,1475,2554,5064,1971,1794,1666,2014,1343, 783, 724, 191,
|
||||
2434,1354,2220,5065,1763,2752,2472,4152, 131, 175,2885,3434, 92,1466,4920,2616,
|
||||
3871,3872,3866, 128,1551,1632, 669,1854,3682,4691,4125,1230, 188,2973,3290,1302,
|
||||
1213, 560,3266, 917, 763,3909,3249,1760, 868,1958, 764,1782,2097, 145,2277,3774,
|
||||
4462, 64,1491,3062, 971,2132,3606,2442, 221,1226,1617, 218, 323,1185,3207,3147,
|
||||
571, 619,1473,1005,1744,2281, 449,1887,2396,3685, 275, 375,3816,1743,3844,3731,
|
||||
845,1983,2350,4210,1377, 773, 967,3499,3052,3743,2725,4007,1697,1022,3943,1464,
|
||||
3264,2855,2722,1952,1029,2839,2467, 84,4383,2215, 820,1391,2015,2448,3672, 377,
|
||||
1948,2168, 797,2545,3536,2578,2645, 94,2874,1678, 405,1259,3071, 771, 546,1315,
|
||||
470,1243,3083, 895,2468, 981, 969,2037, 846,4181, 653,1276,2928, 14,2594, 557,
|
||||
3007,2474, 156, 902,1338,1740,2574, 537,2518, 973,2282,2216,2433,1928, 138,2903,
|
||||
1293,2631,1612, 646,3457, 839,2935, 111, 496,2191,2847, 589,3186, 149,3994,2060,
|
||||
4031,2641,4067,3145,1870, 37,3597,2136,1025,2051,3009,3383,3549,1121,1016,3261,
|
||||
1301, 251,2446,2599,2153, 872,3246, 637, 334,3705, 831, 884, 921,3065,3140,4092,
|
||||
2198,1944, 246,2964, 108,2045,1152,1921,2308,1031, 203,3173,4170,1907,3890, 810,
|
||||
1401,2003,1690, 506, 647,1242,2828,1761,1649,3208,2249,1589,3709,2931,5156,1708,
|
||||
498, 666,2613, 834,3817,1231, 184,2851,1124, 883,3197,2261,3710,1765,1553,2658,
|
||||
1178,2639,2351, 93,1193, 942,2538,2141,4402, 235,1821, 870,1591,2192,1709,1871,
|
||||
3341,1618,4126,2595,2334, 603, 651, 69, 701, 268,2662,3411,2555,1380,1606, 503,
|
||||
448, 254,2371,2646, 574,1187,2309,1770, 322,2235,1292,1801, 305, 566,1133, 229,
|
||||
2067,2057, 706, 167, 483,2002,2672,3295,1820,3561,3067, 316, 378,2746,3452,1112,
|
||||
136,1981, 507,1651,2917,1117, 285,4591, 182,2580,3522,1304, 335,3303,1835,2504,
|
||||
1795,1792,2248, 674,1018,2106,2449,1857,2292,2845, 976,3047,1781,2600,2727,1389,
|
||||
1281, 52,3152, 153, 265,3950, 672,3485,3951,4463, 430,1183, 365, 278,2169, 27,
|
||||
1407,1336,2304, 209,1340,1730,2202,1852,2403,2883, 979,1737,1062, 631,2829,2542,
|
||||
3876,2592, 825,2086,2226,3048,3625, 352,1417,3724, 542, 991, 431,1351,3938,1861,
|
||||
2294, 826,1361,2927,3142,3503,1738, 463,2462,2723, 582,1916,1595,2808, 400,3845,
|
||||
3891,2868,3621,2254, 58,2492,1123, 910,2160,2614,1372,1603,1196,1072,3385,1700,
|
||||
3267,1980, 696, 480,2430, 920, 799,1570,2920,1951,2041,4047,2540,1321,4223,2469,
|
||||
3562,2228,1271,2602, 401,2833,3351,2575,5157, 907,2312,1256, 410, 263,3507,1582,
|
||||
996, 678,1849,2316,1480, 908,3545,2237, 703,2322, 667,1826,2849,1531,2604,2999,
|
||||
2407,3146,2151,2630,1786,3711, 469,3542, 497,3899,2409, 858, 837,4446,3393,1274,
|
||||
786, 620,1845,2001,3311, 484, 308,3367,1204,1815,3691,2332,1532,2557,1842,2020,
|
||||
2724,1927,2333,4440, 567, 22,1673,2728,4475,1987,1858,1144,1597, 101,1832,3601,
|
||||
12, 974,3783,4391, 951,1412, 1,3720, 453,4608,4041, 528,1041,1027,3230,2628,
|
||||
1129, 875,1051,3291,1203,2262,1069,2860,2799,2149,2615,3278, 144,1758,3040, 31,
|
||||
475,1680, 366,2685,3184, 311,1642,4008,2466,5036,1593,1493,2809, 216,1420,1668,
|
||||
233, 304,2128,3284, 232,1429,1768,1040,2008,3407,2740,2967,2543, 242,2133, 778,
|
||||
1565,2022,2620, 505,2189,2756,1098,2273, 372,1614, 708, 553,2846,2094,2278, 169,
|
||||
3626,2835,4161, 228,2674,3165, 809,1454,1309, 466,1705,1095, 900,3423, 880,2667,
|
||||
3751,5258,2317,3109,2571,4317,2766,1503,1342, 866,4447,1118, 63,2076, 314,1881,
|
||||
1348,1061, 172, 978,3515,1747, 532, 511,3970, 6, 601, 905,2699,3300,1751, 276,
|
||||
1467,3725,2668, 65,4239,2544,2779,2556,1604, 578,2451,1802, 992,2331,2624,1320,
|
||||
3446, 713,1513,1013, 103,2786,2447,1661, 886,1702, 916, 654,3574,2031,1556, 751,
|
||||
2178,2821,2179,1498,1538,2176, 271, 914,2251,2080,1325, 638,1953,2937,3877,2432,
|
||||
2754, 95,3265,1716, 260,1227,4083, 775, 106,1357,3254, 426,1607, 555,2480, 772,
|
||||
1985, 244,2546, 474, 495,1046,2611,1851,2061, 71,2089,1675,2590, 742,3758,2843,
|
||||
3222,1433, 267,2180,2576,2826,2233,2092,3913,2435, 956,1745,3075, 856,2113,1116,
|
||||
451, 3,1988,2896,1398, 993,2463,1878,2049,1341,2718,2721,2870,2108, 712,2904,
|
||||
4363,2753,2324, 277,2872,2349,2649, 384, 987, 435, 691,3000, 922, 164,3939, 652,
|
||||
1500,1184,4153,2482,3373,2165,4848,2335,3775,3508,3154,2806,2830,1554,2102,1664,
|
||||
2530,1434,2408, 893,1547,2623,3447,2832,2242,2532,3169,2856,3223,2078, 49,3770,
|
||||
3469, 462, 318, 656,2259,3250,3069, 679,1629,2758, 344,1138,1104,3120,1836,1283,
|
||||
3115,2154,1437,4448, 934, 759,1999, 794,2862,1038, 533,2560,1722,2342, 855,2626,
|
||||
1197,1663,4476,3127, 85,4240,2528, 25,1111,1181,3673, 407,3470,4561,2679,2713,
|
||||
768,1925,2841,3986,1544,1165, 932, 373,1240,2146,1930,2673, 721,4766, 354,4333,
|
||||
391,2963, 187, 61,3364,1442,1102, 330,1940,1767, 341,3809,4118, 393,2496,2062,
|
||||
2211, 105, 331, 300, 439, 913,1332, 626, 379,3304,1557, 328, 689,3952, 309,1555,
|
||||
931, 317,2517,3027, 325, 569, 686,2107,3084, 60,1042,1333,2794, 264,3177,4014,
|
||||
1628, 258,3712, 7,4464,1176,1043,1778, 683, 114,1975, 78,1492, 383,1886, 510,
|
||||
386, 645,5291,2891,2069,3305,4138,3867,2939,2603,2493,1935,1066,1848,3588,1015,
|
||||
1282,1289,4609, 697,1453,3044,2666,3611,1856,2412, 54, 719,1330, 568,3778,2459,
|
||||
1748, 788, 492, 551,1191,1000, 488,3394,3763, 282,1799, 348,2016,1523,3155,2390,
|
||||
1049, 382,2019,1788,1170, 729,2968,3523, 897,3926,2785,2938,3292, 350,2319,3238,
|
||||
1718,1717,2655,3453,3143,4465, 161,2889,2980,2009,1421, 56,1908,1640,2387,2232,
|
||||
1917,1874,2477,4921, 148, 83,3438, 592,4245,2882,1822,1055, 741, 115,1496,1624,
|
||||
381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189,
|
||||
852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512
|
||||
)
|
||||
|
||||
46
venv/Lib/site-packages/pip/_vendor/chardet/gb2312prober.py
Normal file
46
venv/Lib/site-packages/pip/_vendor/chardet/gb2312prober.py
Normal file
@@ -0,0 +1,46 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import GB2312DistributionAnalysis
|
||||
from .mbcssm import GB2312_SM_MODEL
|
||||
|
||||
class GB2312Prober(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(GB2312Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
|
||||
self.distribution_analyzer = GB2312DistributionAnalysis()
|
||||
self.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "GB2312"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Chinese"
|
||||
292
venv/Lib/site-packages/pip/_vendor/chardet/hebrewprober.py
Normal file
292
venv/Lib/site-packages/pip/_vendor/chardet/hebrewprober.py
Normal file
@@ -0,0 +1,292 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Shy Shalom
|
||||
# Portions created by the Initial Developer are Copyright (C) 2005
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
# This prober doesn't actually recognize a language or a charset.
|
||||
# It is a helper prober for the use of the Hebrew model probers
|
||||
|
||||
### General ideas of the Hebrew charset recognition ###
|
||||
#
|
||||
# Four main charsets exist in Hebrew:
|
||||
# "ISO-8859-8" - Visual Hebrew
|
||||
# "windows-1255" - Logical Hebrew
|
||||
# "ISO-8859-8-I" - Logical Hebrew
|
||||
# "x-mac-hebrew" - ?? Logical Hebrew ??
|
||||
#
|
||||
# Both "ISO" charsets use a completely identical set of code points, whereas
|
||||
# "windows-1255" and "x-mac-hebrew" are two different proper supersets of
|
||||
# these code points. windows-1255 defines additional characters in the range
|
||||
# 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
|
||||
# diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
|
||||
# x-mac-hebrew defines similar additional code points but with a different
|
||||
# mapping.
|
||||
#
|
||||
# As far as an average Hebrew text with no diacritics is concerned, all four
|
||||
# charsets are identical with respect to code points. Meaning that for the
|
||||
# main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
|
||||
# (including final letters).
|
||||
#
|
||||
# The dominant difference between these charsets is their directionality.
|
||||
# "Visual" directionality means that the text is ordered as if the renderer is
|
||||
# not aware of a BIDI rendering algorithm. The renderer sees the text and
|
||||
# draws it from left to right. The text itself when ordered naturally is read
|
||||
# backwards. A buffer of Visual Hebrew generally looks like so:
|
||||
# "[last word of first line spelled backwards] [whole line ordered backwards
|
||||
# and spelled backwards] [first word of first line spelled backwards]
|
||||
# [end of line] [last word of second line] ... etc' "
|
||||
# adding punctuation marks, numbers and English text to visual text is
|
||||
# naturally also "visual" and from left to right.
|
||||
#
|
||||
# "Logical" directionality means the text is ordered "naturally" according to
|
||||
# the order it is read. It is the responsibility of the renderer to display
|
||||
# the text from right to left. A BIDI algorithm is used to place general
|
||||
# punctuation marks, numbers and English text in the text.
|
||||
#
|
||||
# Texts in x-mac-hebrew are almost impossible to find on the Internet. From
|
||||
# what little evidence I could find, it seems that its general directionality
|
||||
# is Logical.
|
||||
#
|
||||
# To sum up all of the above, the Hebrew probing mechanism knows about two
|
||||
# charsets:
|
||||
# Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
|
||||
# backwards while line order is natural. For charset recognition purposes
|
||||
# the line order is unimportant (In fact, for this implementation, even
|
||||
# word order is unimportant).
|
||||
# Logical Hebrew - "windows-1255" - normal, naturally ordered text.
|
||||
#
|
||||
# "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
|
||||
# specifically identified.
|
||||
# "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
|
||||
# that contain special punctuation marks or diacritics is displayed with
|
||||
# some unconverted characters showing as question marks. This problem might
|
||||
# be corrected using another model prober for x-mac-hebrew. Due to the fact
|
||||
# that x-mac-hebrew texts are so rare, writing another model prober isn't
|
||||
# worth the effort and performance hit.
|
||||
#
|
||||
#### The Prober ####
|
||||
#
|
||||
# The prober is divided between two SBCharSetProbers and a HebrewProber,
|
||||
# all of which are managed, created, fed data, inquired and deleted by the
|
||||
# SBCSGroupProber. The two SBCharSetProbers identify that the text is in
|
||||
# fact some kind of Hebrew, Logical or Visual. The final decision about which
|
||||
# one is it is made by the HebrewProber by combining final-letter scores
|
||||
# with the scores of the two SBCharSetProbers to produce a final answer.
|
||||
#
|
||||
# The SBCSGroupProber is responsible for stripping the original text of HTML
|
||||
# tags, English characters, numbers, low-ASCII punctuation characters, spaces
|
||||
# and new lines. It reduces any sequence of such characters to a single space.
|
||||
# The buffer fed to each prober in the SBCS group prober is pure text in
|
||||
# high-ASCII.
|
||||
# The two SBCharSetProbers (model probers) share the same language model:
|
||||
# Win1255Model.
|
||||
# The first SBCharSetProber uses the model normally as any other
|
||||
# SBCharSetProber does, to recognize windows-1255, upon which this model was
|
||||
# built. The second SBCharSetProber is told to make the pair-of-letter
|
||||
# lookup in the language model backwards. This in practice exactly simulates
|
||||
# a visual Hebrew model using the windows-1255 logical Hebrew model.
|
||||
#
|
||||
# The HebrewProber is not using any language model. All it does is look for
|
||||
# final-letter evidence suggesting the text is either logical Hebrew or visual
|
||||
# Hebrew. Disjointed from the model probers, the results of the HebrewProber
|
||||
# alone are meaningless. HebrewProber always returns 0.00 as confidence
|
||||
# since it never identifies a charset by itself. Instead, the pointer to the
|
||||
# HebrewProber is passed to the model probers as a helper "Name Prober".
|
||||
# When the Group prober receives a positive identification from any prober,
|
||||
# it asks for the name of the charset identified. If the prober queried is a
|
||||
# Hebrew model prober, the model prober forwards the call to the
|
||||
# HebrewProber to make the final decision. In the HebrewProber, the
|
||||
# decision is made according to the final-letters scores maintained and Both
|
||||
# model probers scores. The answer is returned in the form of the name of the
|
||||
# charset identified, either "windows-1255" or "ISO-8859-8".
|
||||
|
||||
class HebrewProber(CharSetProber):
|
||||
# windows-1255 / ISO-8859-8 code points of interest
|
||||
FINAL_KAF = 0xea
|
||||
NORMAL_KAF = 0xeb
|
||||
FINAL_MEM = 0xed
|
||||
NORMAL_MEM = 0xee
|
||||
FINAL_NUN = 0xef
|
||||
NORMAL_NUN = 0xf0
|
||||
FINAL_PE = 0xf3
|
||||
NORMAL_PE = 0xf4
|
||||
FINAL_TSADI = 0xf5
|
||||
NORMAL_TSADI = 0xf6
|
||||
|
||||
# Minimum Visual vs Logical final letter score difference.
|
||||
# If the difference is below this, don't rely solely on the final letter score
|
||||
# distance.
|
||||
MIN_FINAL_CHAR_DISTANCE = 5
|
||||
|
||||
# Minimum Visual vs Logical model score difference.
|
||||
# If the difference is below this, don't rely at all on the model score
|
||||
# distance.
|
||||
MIN_MODEL_DISTANCE = 0.01
|
||||
|
||||
VISUAL_HEBREW_NAME = "ISO-8859-8"
|
||||
LOGICAL_HEBREW_NAME = "windows-1255"
|
||||
|
||||
def __init__(self):
|
||||
super(HebrewProber, self).__init__()
|
||||
self._final_char_logical_score = None
|
||||
self._final_char_visual_score = None
|
||||
self._prev = None
|
||||
self._before_prev = None
|
||||
self._logical_prober = None
|
||||
self._visual_prober = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._final_char_logical_score = 0
|
||||
self._final_char_visual_score = 0
|
||||
# The two last characters seen in the previous buffer,
|
||||
# mPrev and mBeforePrev are initialized to space in order to simulate
|
||||
# a word delimiter at the beginning of the data
|
||||
self._prev = ' '
|
||||
self._before_prev = ' '
|
||||
# These probers are owned by the group prober.
|
||||
|
||||
def set_model_probers(self, logicalProber, visualProber):
|
||||
self._logical_prober = logicalProber
|
||||
self._visual_prober = visualProber
|
||||
|
||||
def is_final(self, c):
|
||||
return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN,
|
||||
self.FINAL_PE, self.FINAL_TSADI]
|
||||
|
||||
def is_non_final(self, c):
|
||||
# The normal Tsadi is not a good Non-Final letter due to words like
|
||||
# 'lechotet' (to chat) containing an apostrophe after the tsadi. This
|
||||
# apostrophe is converted to a space in FilterWithoutEnglishLetters
|
||||
# causing the Non-Final tsadi to appear at an end of a word even
|
||||
# though this is not the case in the original text.
|
||||
# The letters Pe and Kaf rarely display a related behavior of not being
|
||||
# a good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak'
|
||||
# for example legally end with a Non-Final Pe or Kaf. However, the
|
||||
# benefit of these letters as Non-Final letters outweighs the damage
|
||||
# since these words are quite rare.
|
||||
return c in [self.NORMAL_KAF, self.NORMAL_MEM,
|
||||
self.NORMAL_NUN, self.NORMAL_PE]
|
||||
|
||||
def feed(self, byte_str):
|
||||
# Final letter analysis for logical-visual decision.
|
||||
# Look for evidence that the received buffer is either logical Hebrew
|
||||
# or visual Hebrew.
|
||||
# The following cases are checked:
|
||||
# 1) A word longer than 1 letter, ending with a final letter. This is
|
||||
# an indication that the text is laid out "naturally" since the
|
||||
# final letter really appears at the end. +1 for logical score.
|
||||
# 2) A word longer than 1 letter, ending with a Non-Final letter. In
|
||||
# normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi,
|
||||
# should not end with the Non-Final form of that letter. Exceptions
|
||||
# to this rule are mentioned above in isNonFinal(). This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score
|
||||
# 3) A word longer than 1 letter, starting with a final letter. Final
|
||||
# letters should not appear at the beginning of a word. This is an
|
||||
# indication that the text is laid out backwards. +1 for visual
|
||||
# score.
|
||||
#
|
||||
# The visual score and logical score are accumulated throughout the
|
||||
# text and are finally checked against each other in GetCharSetName().
|
||||
# No checking for final letters in the middle of words is done since
|
||||
# that case is not an indication for either Logical or Visual text.
|
||||
#
|
||||
# We automatically filter out all 7-bit characters (replace them with
|
||||
# spaces) so the word boundary detection works properly. [MAP]
|
||||
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
# Both model probers say it's not them. No reason to continue.
|
||||
return ProbingState.NOT_ME
|
||||
|
||||
byte_str = self.filter_high_byte_only(byte_str)
|
||||
|
||||
for cur in byte_str:
|
||||
if cur == ' ':
|
||||
# We stand on a space - a word just ended
|
||||
if self._before_prev != ' ':
|
||||
# next-to-last char was not a space so self._prev is not a
|
||||
# 1 letter word
|
||||
if self.is_final(self._prev):
|
||||
# case (1) [-2:not space][-1:final letter][cur:space]
|
||||
self._final_char_logical_score += 1
|
||||
elif self.is_non_final(self._prev):
|
||||
# case (2) [-2:not space][-1:Non-Final letter][
|
||||
# cur:space]
|
||||
self._final_char_visual_score += 1
|
||||
else:
|
||||
# Not standing on a space
|
||||
if ((self._before_prev == ' ') and
|
||||
(self.is_final(self._prev)) and (cur != ' ')):
|
||||
# case (3) [-2:space][-1:final letter][cur:not space]
|
||||
self._final_char_visual_score += 1
|
||||
self._before_prev = self._prev
|
||||
self._prev = cur
|
||||
|
||||
# Forever detecting, till the end or until both model probers return
|
||||
# ProbingState.NOT_ME (handled above)
|
||||
return ProbingState.DETECTING
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
# Make the decision: is it Logical or Visual?
|
||||
# If the final letter score distance is dominant enough, rely on it.
|
||||
finalsub = self._final_char_logical_score - self._final_char_visual_score
|
||||
if finalsub >= self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if finalsub <= -self.MIN_FINAL_CHAR_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# It's not dominant enough, try to rely on the model scores instead.
|
||||
modelsub = (self._logical_prober.get_confidence()
|
||||
- self._visual_prober.get_confidence())
|
||||
if modelsub > self.MIN_MODEL_DISTANCE:
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
if modelsub < -self.MIN_MODEL_DISTANCE:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# Still no good, back to final letter distance, maybe it'll save the
|
||||
# day.
|
||||
if finalsub < 0.0:
|
||||
return self.VISUAL_HEBREW_NAME
|
||||
|
||||
# (finalsub > 0 - Logical) or (don't know what to do) default to
|
||||
# Logical.
|
||||
return self.LOGICAL_HEBREW_NAME
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return 'Hebrew'
|
||||
|
||||
@property
|
||||
def state(self):
|
||||
# Remain active as long as any of the model probers are active.
|
||||
if (self._logical_prober.state == ProbingState.NOT_ME) and \
|
||||
(self._visual_prober.state == ProbingState.NOT_ME):
|
||||
return ProbingState.NOT_ME
|
||||
return ProbingState.DETECTING
|
||||
325
venv/Lib/site-packages/pip/_vendor/chardet/jisfreq.py
Normal file
325
venv/Lib/site-packages/pip/_vendor/chardet/jisfreq.py
Normal file
@@ -0,0 +1,325 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
# Sampling from about 20M text materials include literature and computer technology
|
||||
#
|
||||
# Japanese frequency table, applied to both S-JIS and EUC-JP
|
||||
# They are sorted in order.
|
||||
|
||||
# 128 --> 0.77094
|
||||
# 256 --> 0.85710
|
||||
# 512 --> 0.92635
|
||||
# 1024 --> 0.97130
|
||||
# 2048 --> 0.99431
|
||||
#
|
||||
# Ideal Distribution Ratio = 0.92635 / (1-0.92635) = 12.58
|
||||
# Random Distribution Ration = 512 / (2965+62+83+86-512) = 0.191
|
||||
#
|
||||
# Typical Distribution Ratio, 25% of IDR
|
||||
|
||||
JIS_TYPICAL_DISTRIBUTION_RATIO = 3.0
|
||||
|
||||
# Char to FreqOrder table ,
|
||||
JIS_TABLE_SIZE = 4368
|
||||
|
||||
JIS_CHAR_TO_FREQ_ORDER = (
|
||||
40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16
|
||||
3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32
|
||||
1262,5072, 619, 127,3431,3512,3230,1899,1700, 232, 228,1294,1298, 284, 283,2041, # 48
|
||||
2042,1061,1062, 48, 49, 44, 45, 433, 434,1040,1041, 996, 787,2997,1255,4305, # 64
|
||||
2108,4609,1684,1648,5073,5074,5075,5076,5077,5078,3687,5079,4610,5080,3927,3928, # 80
|
||||
5081,3296,3432, 290,2285,1471,2187,5082,2580,2825,1303,2140,1739,1445,2691,3375, # 96
|
||||
1691,3297,4306,4307,4611, 452,3376,1182,2713,3688,3069,4308,5083,5084,5085,5086, # 112
|
||||
5087,5088,5089,5090,5091,5092,5093,5094,5095,5096,5097,5098,5099,5100,5101,5102, # 128
|
||||
5103,5104,5105,5106,5107,5108,5109,5110,5111,5112,4097,5113,5114,5115,5116,5117, # 144
|
||||
5118,5119,5120,5121,5122,5123,5124,5125,5126,5127,5128,5129,5130,5131,5132,5133, # 160
|
||||
5134,5135,5136,5137,5138,5139,5140,5141,5142,5143,5144,5145,5146,5147,5148,5149, # 176
|
||||
5150,5151,5152,4612,5153,5154,5155,5156,5157,5158,5159,5160,5161,5162,5163,5164, # 192
|
||||
5165,5166,5167,5168,5169,5170,5171,5172,5173,5174,5175,1472, 598, 618, 820,1205, # 208
|
||||
1309,1412,1858,1307,1692,5176,5177,5178,5179,5180,5181,5182,1142,1452,1234,1172, # 224
|
||||
1875,2043,2149,1793,1382,2973, 925,2404,1067,1241, 960,1377,2935,1491, 919,1217, # 240
|
||||
1865,2030,1406,1499,2749,4098,5183,5184,5185,5186,5187,5188,2561,4099,3117,1804, # 256
|
||||
2049,3689,4309,3513,1663,5189,3166,3118,3298,1587,1561,3433,5190,3119,1625,2998, # 272
|
||||
3299,4613,1766,3690,2786,4614,5191,5192,5193,5194,2161, 26,3377, 2,3929, 20, # 288
|
||||
3691, 47,4100, 50, 17, 16, 35, 268, 27, 243, 42, 155, 24, 154, 29, 184, # 304
|
||||
4, 91, 14, 92, 53, 396, 33, 289, 9, 37, 64, 620, 21, 39, 321, 5, # 320
|
||||
12, 11, 52, 13, 3, 208, 138, 0, 7, 60, 526, 141, 151,1069, 181, 275, # 336
|
||||
1591, 83, 132,1475, 126, 331, 829, 15, 69, 160, 59, 22, 157, 55,1079, 312, # 352
|
||||
109, 38, 23, 25, 10, 19, 79,5195, 61, 382,1124, 8, 30,5196,5197,5198, # 368
|
||||
5199,5200,5201,5202,5203,5204,5205,5206, 89, 62, 74, 34,2416, 112, 139, 196, # 384
|
||||
271, 149, 84, 607, 131, 765, 46, 88, 153, 683, 76, 874, 101, 258, 57, 80, # 400
|
||||
32, 364, 121,1508, 169,1547, 68, 235, 145,2999, 41, 360,3027, 70, 63, 31, # 416
|
||||
43, 259, 262,1383, 99, 533, 194, 66, 93, 846, 217, 192, 56, 106, 58, 565, # 432
|
||||
280, 272, 311, 256, 146, 82, 308, 71, 100, 128, 214, 655, 110, 261, 104,1140, # 448
|
||||
54, 51, 36, 87, 67,3070, 185,2618,2936,2020, 28,1066,2390,2059,5207,5208, # 464
|
||||
5209,5210,5211,5212,5213,5214,5215,5216,4615,5217,5218,5219,5220,5221,5222,5223, # 480
|
||||
5224,5225,5226,5227,5228,5229,5230,5231,5232,5233,5234,5235,5236,3514,5237,5238, # 496
|
||||
5239,5240,5241,5242,5243,5244,2297,2031,4616,4310,3692,5245,3071,5246,3598,5247, # 512
|
||||
4617,3231,3515,5248,4101,4311,4618,3808,4312,4102,5249,4103,4104,3599,5250,5251, # 528
|
||||
5252,5253,5254,5255,5256,5257,5258,5259,5260,5261,5262,5263,5264,5265,5266,5267, # 544
|
||||
5268,5269,5270,5271,5272,5273,5274,5275,5276,5277,5278,5279,5280,5281,5282,5283, # 560
|
||||
5284,5285,5286,5287,5288,5289,5290,5291,5292,5293,5294,5295,5296,5297,5298,5299, # 576
|
||||
5300,5301,5302,5303,5304,5305,5306,5307,5308,5309,5310,5311,5312,5313,5314,5315, # 592
|
||||
5316,5317,5318,5319,5320,5321,5322,5323,5324,5325,5326,5327,5328,5329,5330,5331, # 608
|
||||
5332,5333,5334,5335,5336,5337,5338,5339,5340,5341,5342,5343,5344,5345,5346,5347, # 624
|
||||
5348,5349,5350,5351,5352,5353,5354,5355,5356,5357,5358,5359,5360,5361,5362,5363, # 640
|
||||
5364,5365,5366,5367,5368,5369,5370,5371,5372,5373,5374,5375,5376,5377,5378,5379, # 656
|
||||
5380,5381, 363, 642,2787,2878,2788,2789,2316,3232,2317,3434,2011, 165,1942,3930, # 672
|
||||
3931,3932,3933,5382,4619,5383,4620,5384,5385,5386,5387,5388,5389,5390,5391,5392, # 688
|
||||
5393,5394,5395,5396,5397,5398,5399,5400,5401,5402,5403,5404,5405,5406,5407,5408, # 704
|
||||
5409,5410,5411,5412,5413,5414,5415,5416,5417,5418,5419,5420,5421,5422,5423,5424, # 720
|
||||
5425,5426,5427,5428,5429,5430,5431,5432,5433,5434,5435,5436,5437,5438,5439,5440, # 736
|
||||
5441,5442,5443,5444,5445,5446,5447,5448,5449,5450,5451,5452,5453,5454,5455,5456, # 752
|
||||
5457,5458,5459,5460,5461,5462,5463,5464,5465,5466,5467,5468,5469,5470,5471,5472, # 768
|
||||
5473,5474,5475,5476,5477,5478,5479,5480,5481,5482,5483,5484,5485,5486,5487,5488, # 784
|
||||
5489,5490,5491,5492,5493,5494,5495,5496,5497,5498,5499,5500,5501,5502,5503,5504, # 800
|
||||
5505,5506,5507,5508,5509,5510,5511,5512,5513,5514,5515,5516,5517,5518,5519,5520, # 816
|
||||
5521,5522,5523,5524,5525,5526,5527,5528,5529,5530,5531,5532,5533,5534,5535,5536, # 832
|
||||
5537,5538,5539,5540,5541,5542,5543,5544,5545,5546,5547,5548,5549,5550,5551,5552, # 848
|
||||
5553,5554,5555,5556,5557,5558,5559,5560,5561,5562,5563,5564,5565,5566,5567,5568, # 864
|
||||
5569,5570,5571,5572,5573,5574,5575,5576,5577,5578,5579,5580,5581,5582,5583,5584, # 880
|
||||
5585,5586,5587,5588,5589,5590,5591,5592,5593,5594,5595,5596,5597,5598,5599,5600, # 896
|
||||
5601,5602,5603,5604,5605,5606,5607,5608,5609,5610,5611,5612,5613,5614,5615,5616, # 912
|
||||
5617,5618,5619,5620,5621,5622,5623,5624,5625,5626,5627,5628,5629,5630,5631,5632, # 928
|
||||
5633,5634,5635,5636,5637,5638,5639,5640,5641,5642,5643,5644,5645,5646,5647,5648, # 944
|
||||
5649,5650,5651,5652,5653,5654,5655,5656,5657,5658,5659,5660,5661,5662,5663,5664, # 960
|
||||
5665,5666,5667,5668,5669,5670,5671,5672,5673,5674,5675,5676,5677,5678,5679,5680, # 976
|
||||
5681,5682,5683,5684,5685,5686,5687,5688,5689,5690,5691,5692,5693,5694,5695,5696, # 992
|
||||
5697,5698,5699,5700,5701,5702,5703,5704,5705,5706,5707,5708,5709,5710,5711,5712, # 1008
|
||||
5713,5714,5715,5716,5717,5718,5719,5720,5721,5722,5723,5724,5725,5726,5727,5728, # 1024
|
||||
5729,5730,5731,5732,5733,5734,5735,5736,5737,5738,5739,5740,5741,5742,5743,5744, # 1040
|
||||
5745,5746,5747,5748,5749,5750,5751,5752,5753,5754,5755,5756,5757,5758,5759,5760, # 1056
|
||||
5761,5762,5763,5764,5765,5766,5767,5768,5769,5770,5771,5772,5773,5774,5775,5776, # 1072
|
||||
5777,5778,5779,5780,5781,5782,5783,5784,5785,5786,5787,5788,5789,5790,5791,5792, # 1088
|
||||
5793,5794,5795,5796,5797,5798,5799,5800,5801,5802,5803,5804,5805,5806,5807,5808, # 1104
|
||||
5809,5810,5811,5812,5813,5814,5815,5816,5817,5818,5819,5820,5821,5822,5823,5824, # 1120
|
||||
5825,5826,5827,5828,5829,5830,5831,5832,5833,5834,5835,5836,5837,5838,5839,5840, # 1136
|
||||
5841,5842,5843,5844,5845,5846,5847,5848,5849,5850,5851,5852,5853,5854,5855,5856, # 1152
|
||||
5857,5858,5859,5860,5861,5862,5863,5864,5865,5866,5867,5868,5869,5870,5871,5872, # 1168
|
||||
5873,5874,5875,5876,5877,5878,5879,5880,5881,5882,5883,5884,5885,5886,5887,5888, # 1184
|
||||
5889,5890,5891,5892,5893,5894,5895,5896,5897,5898,5899,5900,5901,5902,5903,5904, # 1200
|
||||
5905,5906,5907,5908,5909,5910,5911,5912,5913,5914,5915,5916,5917,5918,5919,5920, # 1216
|
||||
5921,5922,5923,5924,5925,5926,5927,5928,5929,5930,5931,5932,5933,5934,5935,5936, # 1232
|
||||
5937,5938,5939,5940,5941,5942,5943,5944,5945,5946,5947,5948,5949,5950,5951,5952, # 1248
|
||||
5953,5954,5955,5956,5957,5958,5959,5960,5961,5962,5963,5964,5965,5966,5967,5968, # 1264
|
||||
5969,5970,5971,5972,5973,5974,5975,5976,5977,5978,5979,5980,5981,5982,5983,5984, # 1280
|
||||
5985,5986,5987,5988,5989,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999,6000, # 1296
|
||||
6001,6002,6003,6004,6005,6006,6007,6008,6009,6010,6011,6012,6013,6014,6015,6016, # 1312
|
||||
6017,6018,6019,6020,6021,6022,6023,6024,6025,6026,6027,6028,6029,6030,6031,6032, # 1328
|
||||
6033,6034,6035,6036,6037,6038,6039,6040,6041,6042,6043,6044,6045,6046,6047,6048, # 1344
|
||||
6049,6050,6051,6052,6053,6054,6055,6056,6057,6058,6059,6060,6061,6062,6063,6064, # 1360
|
||||
6065,6066,6067,6068,6069,6070,6071,6072,6073,6074,6075,6076,6077,6078,6079,6080, # 1376
|
||||
6081,6082,6083,6084,6085,6086,6087,6088,6089,6090,6091,6092,6093,6094,6095,6096, # 1392
|
||||
6097,6098,6099,6100,6101,6102,6103,6104,6105,6106,6107,6108,6109,6110,6111,6112, # 1408
|
||||
6113,6114,2044,2060,4621, 997,1235, 473,1186,4622, 920,3378,6115,6116, 379,1108, # 1424
|
||||
4313,2657,2735,3934,6117,3809, 636,3233, 573,1026,3693,3435,2974,3300,2298,4105, # 1440
|
||||
854,2937,2463, 393,2581,2417, 539, 752,1280,2750,2480, 140,1161, 440, 708,1569, # 1456
|
||||
665,2497,1746,1291,1523,3000, 164,1603, 847,1331, 537,1997, 486, 508,1693,2418, # 1472
|
||||
1970,2227, 878,1220, 299,1030, 969, 652,2751, 624,1137,3301,2619, 65,3302,2045, # 1488
|
||||
1761,1859,3120,1930,3694,3516, 663,1767, 852, 835,3695, 269, 767,2826,2339,1305, # 1504
|
||||
896,1150, 770,1616,6118, 506,1502,2075,1012,2519, 775,2520,2975,2340,2938,4314, # 1520
|
||||
3028,2086,1224,1943,2286,6119,3072,4315,2240,1273,1987,3935,1557, 175, 597, 985, # 1536
|
||||
3517,2419,2521,1416,3029, 585, 938,1931,1007,1052,1932,1685,6120,3379,4316,4623, # 1552
|
||||
804, 599,3121,1333,2128,2539,1159,1554,2032,3810, 687,2033,2904, 952, 675,1467, # 1568
|
||||
3436,6121,2241,1096,1786,2440,1543,1924, 980,1813,2228, 781,2692,1879, 728,1918, # 1584
|
||||
3696,4624, 548,1950,4625,1809,1088,1356,3303,2522,1944, 502, 972, 373, 513,2827, # 1600
|
||||
586,2377,2391,1003,1976,1631,6122,2464,1084, 648,1776,4626,2141, 324, 962,2012, # 1616
|
||||
2177,2076,1384, 742,2178,1448,1173,1810, 222, 102, 301, 445, 125,2420, 662,2498, # 1632
|
||||
277, 200,1476,1165,1068, 224,2562,1378,1446, 450,1880, 659, 791, 582,4627,2939, # 1648
|
||||
3936,1516,1274, 555,2099,3697,1020,1389,1526,3380,1762,1723,1787,2229, 412,2114, # 1664
|
||||
1900,2392,3518, 512,2597, 427,1925,2341,3122,1653,1686,2465,2499, 697, 330, 273, # 1680
|
||||
380,2162, 951, 832, 780, 991,1301,3073, 965,2270,3519, 668,2523,2636,1286, 535, # 1696
|
||||
1407, 518, 671, 957,2658,2378, 267, 611,2197,3030,6123, 248,2299, 967,1799,2356, # 1712
|
||||
850,1418,3437,1876,1256,1480,2828,1718,6124,6125,1755,1664,2405,6126,4628,2879, # 1728
|
||||
2829, 499,2179, 676,4629, 557,2329,2214,2090, 325,3234, 464, 811,3001, 992,2342, # 1744
|
||||
2481,1232,1469, 303,2242, 466,1070,2163, 603,1777,2091,4630,2752,4631,2714, 322, # 1760
|
||||
2659,1964,1768, 481,2188,1463,2330,2857,3600,2092,3031,2421,4632,2318,2070,1849, # 1776
|
||||
2598,4633,1302,2254,1668,1701,2422,3811,2905,3032,3123,2046,4106,1763,1694,4634, # 1792
|
||||
1604, 943,1724,1454, 917, 868,2215,1169,2940, 552,1145,1800,1228,1823,1955, 316, # 1808
|
||||
1080,2510, 361,1807,2830,4107,2660,3381,1346,1423,1134,4108,6127, 541,1263,1229, # 1824
|
||||
1148,2540, 545, 465,1833,2880,3438,1901,3074,2482, 816,3937, 713,1788,2500, 122, # 1840
|
||||
1575, 195,1451,2501,1111,6128, 859, 374,1225,2243,2483,4317, 390,1033,3439,3075, # 1856
|
||||
2524,1687, 266, 793,1440,2599, 946, 779, 802, 507, 897,1081, 528,2189,1292, 711, # 1872
|
||||
1866,1725,1167,1640, 753, 398,2661,1053, 246, 348,4318, 137,1024,3440,1600,2077, # 1888
|
||||
2129, 825,4319, 698, 238, 521, 187,2300,1157,2423,1641,1605,1464,1610,1097,2541, # 1904
|
||||
1260,1436, 759,2255,1814,2150, 705,3235, 409,2563,3304, 561,3033,2005,2564, 726, # 1920
|
||||
1956,2343,3698,4109, 949,3812,3813,3520,1669, 653,1379,2525, 881,2198, 632,2256, # 1936
|
||||
1027, 778,1074, 733,1957, 514,1481,2466, 554,2180, 702,3938,1606,1017,1398,6129, # 1952
|
||||
1380,3521, 921, 993,1313, 594, 449,1489,1617,1166, 768,1426,1360, 495,1794,3601, # 1968
|
||||
1177,3602,1170,4320,2344, 476, 425,3167,4635,3168,1424, 401,2662,1171,3382,1998, # 1984
|
||||
1089,4110, 477,3169, 474,6130,1909, 596,2831,1842, 494, 693,1051,1028,1207,3076, # 2000
|
||||
606,2115, 727,2790,1473,1115, 743,3522, 630, 805,1532,4321,2021, 366,1057, 838, # 2016
|
||||
684,1114,2142,4322,2050,1492,1892,1808,2271,3814,2424,1971,1447,1373,3305,1090, # 2032
|
||||
1536,3939,3523,3306,1455,2199, 336, 369,2331,1035, 584,2393, 902, 718,2600,6131, # 2048
|
||||
2753, 463,2151,1149,1611,2467, 715,1308,3124,1268, 343,1413,3236,1517,1347,2663, # 2064
|
||||
2093,3940,2022,1131,1553,2100,2941,1427,3441,2942,1323,2484,6132,1980, 872,2368, # 2080
|
||||
2441,2943, 320,2369,2116,1082, 679,1933,3941,2791,3815, 625,1143,2023, 422,2200, # 2096
|
||||
3816,6133, 730,1695, 356,2257,1626,2301,2858,2637,1627,1778, 937, 883,2906,2693, # 2112
|
||||
3002,1769,1086, 400,1063,1325,3307,2792,4111,3077, 456,2345,1046, 747,6134,1524, # 2128
|
||||
884,1094,3383,1474,2164,1059, 974,1688,2181,2258,1047, 345,1665,1187, 358, 875, # 2144
|
||||
3170, 305, 660,3524,2190,1334,1135,3171,1540,1649,2542,1527, 927, 968,2793, 885, # 2160
|
||||
1972,1850, 482, 500,2638,1218,1109,1085,2543,1654,2034, 876, 78,2287,1482,1277, # 2176
|
||||
861,1675,1083,1779, 724,2754, 454, 397,1132,1612,2332, 893, 672,1237, 257,2259, # 2192
|
||||
2370, 135,3384, 337,2244, 547, 352, 340, 709,2485,1400, 788,1138,2511, 540, 772, # 2208
|
||||
1682,2260,2272,2544,2013,1843,1902,4636,1999,1562,2288,4637,2201,1403,1533, 407, # 2224
|
||||
576,3308,1254,2071, 978,3385, 170, 136,1201,3125,2664,3172,2394, 213, 912, 873, # 2240
|
||||
3603,1713,2202, 699,3604,3699, 813,3442, 493, 531,1054, 468,2907,1483, 304, 281, # 2256
|
||||
4112,1726,1252,2094, 339,2319,2130,2639, 756,1563,2944, 748, 571,2976,1588,2425, # 2272
|
||||
2715,1851,1460,2426,1528,1392,1973,3237, 288,3309, 685,3386, 296, 892,2716,2216, # 2288
|
||||
1570,2245, 722,1747,2217, 905,3238,1103,6135,1893,1441,1965, 251,1805,2371,3700, # 2304
|
||||
2601,1919,1078, 75,2182,1509,1592,1270,2640,4638,2152,6136,3310,3817, 524, 706, # 2320
|
||||
1075, 292,3818,1756,2602, 317, 98,3173,3605,3525,1844,2218,3819,2502, 814, 567, # 2336
|
||||
385,2908,1534,6137, 534,1642,3239, 797,6138,1670,1529, 953,4323, 188,1071, 538, # 2352
|
||||
178, 729,3240,2109,1226,1374,2000,2357,2977, 731,2468,1116,2014,2051,6139,1261, # 2368
|
||||
1593, 803,2859,2736,3443, 556, 682, 823,1541,6140,1369,2289,1706,2794, 845, 462, # 2384
|
||||
2603,2665,1361, 387, 162,2358,1740, 739,1770,1720,1304,1401,3241,1049, 627,1571, # 2400
|
||||
2427,3526,1877,3942,1852,1500, 431,1910,1503, 677, 297,2795, 286,1433,1038,1198, # 2416
|
||||
2290,1133,1596,4113,4639,2469,1510,1484,3943,6141,2442, 108, 712,4640,2372, 866, # 2432
|
||||
3701,2755,3242,1348, 834,1945,1408,3527,2395,3243,1811, 824, 994,1179,2110,1548, # 2448
|
||||
1453, 790,3003, 690,4324,4325,2832,2909,3820,1860,3821, 225,1748, 310, 346,1780, # 2464
|
||||
2470, 821,1993,2717,2796, 828, 877,3528,2860,2471,1702,2165,2910,2486,1789, 453, # 2480
|
||||
359,2291,1676, 73,1164,1461,1127,3311, 421, 604, 314,1037, 589, 116,2487, 737, # 2496
|
||||
837,1180, 111, 244, 735,6142,2261,1861,1362, 986, 523, 418, 581,2666,3822, 103, # 2512
|
||||
855, 503,1414,1867,2488,1091, 657,1597, 979, 605,1316,4641,1021,2443,2078,2001, # 2528
|
||||
1209, 96, 587,2166,1032, 260,1072,2153, 173, 94, 226,3244, 819,2006,4642,4114, # 2544
|
||||
2203, 231,1744, 782, 97,2667, 786,3387, 887, 391, 442,2219,4326,1425,6143,2694, # 2560
|
||||
633,1544,1202, 483,2015, 592,2052,1958,2472,1655, 419, 129,4327,3444,3312,1714, # 2576
|
||||
1257,3078,4328,1518,1098, 865,1310,1019,1885,1512,1734, 469,2444, 148, 773, 436, # 2592
|
||||
1815,1868,1128,1055,4329,1245,2756,3445,2154,1934,1039,4643, 579,1238, 932,2320, # 2608
|
||||
353, 205, 801, 115,2428, 944,2321,1881, 399,2565,1211, 678, 766,3944, 335,2101, # 2624
|
||||
1459,1781,1402,3945,2737,2131,1010, 844, 981,1326,1013, 550,1816,1545,2620,1335, # 2640
|
||||
1008, 371,2881, 936,1419,1613,3529,1456,1395,2273,1834,2604,1317,2738,2503, 416, # 2656
|
||||
1643,4330, 806,1126, 229, 591,3946,1314,1981,1576,1837,1666, 347,1790, 977,3313, # 2672
|
||||
764,2861,1853, 688,2429,1920,1462, 77, 595, 415,2002,3034, 798,1192,4115,6144, # 2688
|
||||
2978,4331,3035,2695,2582,2072,2566, 430,2430,1727, 842,1396,3947,3702, 613, 377, # 2704
|
||||
278, 236,1417,3388,3314,3174, 757,1869, 107,3530,6145,1194, 623,2262, 207,1253, # 2720
|
||||
2167,3446,3948, 492,1117,1935, 536,1838,2757,1246,4332, 696,2095,2406,1393,1572, # 2736
|
||||
3175,1782, 583, 190, 253,1390,2230, 830,3126,3389, 934,3245,1703,1749,2979,1870, # 2752
|
||||
2545,1656,2204, 869,2346,4116,3176,1817, 496,1764,4644, 942,1504, 404,1903,1122, # 2768
|
||||
1580,3606,2945,1022, 515, 372,1735, 955,2431,3036,6146,2797,1110,2302,2798, 617, # 2784
|
||||
6147, 441, 762,1771,3447,3607,3608,1904, 840,3037, 86, 939,1385, 572,1370,2445, # 2800
|
||||
1336, 114,3703, 898, 294, 203,3315, 703,1583,2274, 429, 961,4333,1854,1951,3390, # 2816
|
||||
2373,3704,4334,1318,1381, 966,1911,2322,1006,1155, 309, 989, 458,2718,1795,1372, # 2832
|
||||
1203, 252,1689,1363,3177, 517,1936, 168,1490, 562, 193,3823,1042,4117,1835, 551, # 2848
|
||||
470,4645, 395, 489,3448,1871,1465,2583,2641, 417,1493, 279,1295, 511,1236,1119, # 2864
|
||||
72,1231,1982,1812,3004, 871,1564, 984,3449,1667,2696,2096,4646,2347,2833,1673, # 2880
|
||||
3609, 695,3246,2668, 807,1183,4647, 890, 388,2333,1801,1457,2911,1765,1477,1031, # 2896
|
||||
3316,3317,1278,3391,2799,2292,2526, 163,3450,4335,2669,1404,1802,6148,2323,2407, # 2912
|
||||
1584,1728,1494,1824,1269, 298, 909,3318,1034,1632, 375, 776,1683,2061, 291, 210, # 2928
|
||||
1123, 809,1249,1002,2642,3038, 206,1011,2132, 144, 975, 882,1565, 342, 667, 754, # 2944
|
||||
1442,2143,1299,2303,2062, 447, 626,2205,1221,2739,2912,1144,1214,2206,2584, 760, # 2960
|
||||
1715, 614, 950,1281,2670,2621, 810, 577,1287,2546,4648, 242,2168, 250,2643, 691, # 2976
|
||||
123,2644, 647, 313,1029, 689,1357,2946,1650, 216, 771,1339,1306, 808,2063, 549, # 2992
|
||||
913,1371,2913,2914,6149,1466,1092,1174,1196,1311,2605,2396,1783,1796,3079, 406, # 3008
|
||||
2671,2117,3949,4649, 487,1825,2220,6150,2915, 448,2348,1073,6151,2397,1707, 130, # 3024
|
||||
900,1598, 329, 176,1959,2527,1620,6152,2275,4336,3319,1983,2191,3705,3610,2155, # 3040
|
||||
3706,1912,1513,1614,6153,1988, 646, 392,2304,1589,3320,3039,1826,1239,1352,1340, # 3056
|
||||
2916, 505,2567,1709,1437,2408,2547, 906,6154,2672, 384,1458,1594,1100,1329, 710, # 3072
|
||||
423,3531,2064,2231,2622,1989,2673,1087,1882, 333, 841,3005,1296,2882,2379, 580, # 3088
|
||||
1937,1827,1293,2585, 601, 574, 249,1772,4118,2079,1120, 645, 901,1176,1690, 795, # 3104
|
||||
2207, 478,1434, 516,1190,1530, 761,2080, 930,1264, 355, 435,1552, 644,1791, 987, # 3120
|
||||
220,1364,1163,1121,1538, 306,2169,1327,1222, 546,2645, 218, 241, 610,1704,3321, # 3136
|
||||
1984,1839,1966,2528, 451,6155,2586,3707,2568, 907,3178, 254,2947, 186,1845,4650, # 3152
|
||||
745, 432,1757, 428,1633, 888,2246,2221,2489,3611,2118,1258,1265, 956,3127,1784, # 3168
|
||||
4337,2490, 319, 510, 119, 457,3612, 274,2035,2007,4651,1409,3128, 970,2758, 590, # 3184
|
||||
2800, 661,2247,4652,2008,3950,1420,1549,3080,3322,3951,1651,1375,2111, 485,2491, # 3200
|
||||
1429,1156,6156,2548,2183,1495, 831,1840,2529,2446, 501,1657, 307,1894,3247,1341, # 3216
|
||||
666, 899,2156,1539,2549,1559, 886, 349,2208,3081,2305,1736,3824,2170,2759,1014, # 3232
|
||||
1913,1386, 542,1397,2948, 490, 368, 716, 362, 159, 282,2569,1129,1658,1288,1750, # 3248
|
||||
2674, 276, 649,2016, 751,1496, 658,1818,1284,1862,2209,2087,2512,3451, 622,2834, # 3264
|
||||
376, 117,1060,2053,1208,1721,1101,1443, 247,1250,3179,1792,3952,2760,2398,3953, # 3280
|
||||
6157,2144,3708, 446,2432,1151,2570,3452,2447,2761,2835,1210,2448,3082, 424,2222, # 3296
|
||||
1251,2449,2119,2836, 504,1581,4338, 602, 817, 857,3825,2349,2306, 357,3826,1470, # 3312
|
||||
1883,2883, 255, 958, 929,2917,3248, 302,4653,1050,1271,1751,2307,1952,1430,2697, # 3328
|
||||
2719,2359, 354,3180, 777, 158,2036,4339,1659,4340,4654,2308,2949,2248,1146,2232, # 3344
|
||||
3532,2720,1696,2623,3827,6158,3129,1550,2698,1485,1297,1428, 637, 931,2721,2145, # 3360
|
||||
914,2550,2587, 81,2450, 612, 827,2646,1242,4655,1118,2884, 472,1855,3181,3533, # 3376
|
||||
3534, 569,1353,2699,1244,1758,2588,4119,2009,2762,2171,3709,1312,1531,6159,1152, # 3392
|
||||
1938, 134,1830, 471,3710,2276,1112,1535,3323,3453,3535, 982,1337,2950, 488, 826, # 3408
|
||||
674,1058,1628,4120,2017, 522,2399, 211, 568,1367,3454, 350, 293,1872,1139,3249, # 3424
|
||||
1399,1946,3006,1300,2360,3324, 588, 736,6160,2606, 744, 669,3536,3828,6161,1358, # 3440
|
||||
199, 723, 848, 933, 851,1939,1505,1514,1338,1618,1831,4656,1634,3613, 443,2740, # 3456
|
||||
3829, 717,1947, 491,1914,6162,2551,1542,4121,1025,6163,1099,1223, 198,3040,2722, # 3472
|
||||
370, 410,1905,2589, 998,1248,3182,2380, 519,1449,4122,1710, 947, 928,1153,4341, # 3488
|
||||
2277, 344,2624,1511, 615, 105, 161,1212,1076,1960,3130,2054,1926,1175,1906,2473, # 3504
|
||||
414,1873,2801,6164,2309, 315,1319,3325, 318,2018,2146,2157, 963, 631, 223,4342, # 3520
|
||||
4343,2675, 479,3711,1197,2625,3712,2676,2361,6165,4344,4123,6166,2451,3183,1886, # 3536
|
||||
2184,1674,1330,1711,1635,1506, 799, 219,3250,3083,3954,1677,3713,3326,2081,3614, # 3552
|
||||
1652,2073,4657,1147,3041,1752, 643,1961, 147,1974,3955,6167,1716,2037, 918,3007, # 3568
|
||||
1994, 120,1537, 118, 609,3184,4345, 740,3455,1219, 332,1615,3830,6168,1621,2980, # 3584
|
||||
1582, 783, 212, 553,2350,3714,1349,2433,2082,4124, 889,6169,2310,1275,1410, 973, # 3600
|
||||
166,1320,3456,1797,1215,3185,2885,1846,2590,2763,4658, 629, 822,3008, 763, 940, # 3616
|
||||
1990,2862, 439,2409,1566,1240,1622, 926,1282,1907,2764, 654,2210,1607, 327,1130, # 3632
|
||||
3956,1678,1623,6170,2434,2192, 686, 608,3831,3715, 903,3957,3042,6171,2741,1522, # 3648
|
||||
1915,1105,1555,2552,1359, 323,3251,4346,3457, 738,1354,2553,2311,2334,1828,2003, # 3664
|
||||
3832,1753,2351,1227,6172,1887,4125,1478,6173,2410,1874,1712,1847, 520,1204,2607, # 3680
|
||||
264,4659, 836,2677,2102, 600,4660,3833,2278,3084,6174,4347,3615,1342, 640, 532, # 3696
|
||||
543,2608,1888,2400,2591,1009,4348,1497, 341,1737,3616,2723,1394, 529,3252,1321, # 3712
|
||||
983,4661,1515,2120, 971,2592, 924, 287,1662,3186,4349,2700,4350,1519, 908,1948, # 3728
|
||||
2452, 156, 796,1629,1486,2223,2055, 694,4126,1259,1036,3392,1213,2249,2742,1889, # 3744
|
||||
1230,3958,1015, 910, 408, 559,3617,4662, 746, 725, 935,4663,3959,3009,1289, 563, # 3760
|
||||
867,4664,3960,1567,2981,2038,2626, 988,2263,2381,4351, 143,2374, 704,1895,6175, # 3776
|
||||
1188,3716,2088, 673,3085,2362,4352, 484,1608,1921,2765,2918, 215, 904,3618,3537, # 3792
|
||||
894, 509, 976,3043,2701,3961,4353,2837,2982, 498,6176,6177,1102,3538,1332,3393, # 3808
|
||||
1487,1636,1637, 233, 245,3962, 383, 650, 995,3044, 460,1520,1206,2352, 749,3327, # 3824
|
||||
530, 700, 389,1438,1560,1773,3963,2264, 719,2951,2724,3834, 870,1832,1644,1000, # 3840
|
||||
839,2474,3717, 197,1630,3394, 365,2886,3964,1285,2133, 734, 922, 818,1106, 732, # 3856
|
||||
480,2083,1774,3458, 923,2279,1350, 221,3086, 85,2233,2234,3835,1585,3010,2147, # 3872
|
||||
1387,1705,2382,1619,2475, 133, 239,2802,1991,1016,2084,2383, 411,2838,1113, 651, # 3888
|
||||
1985,1160,3328, 990,1863,3087,1048,1276,2647, 265,2627,1599,3253,2056, 150, 638, # 3904
|
||||
2019, 656, 853, 326,1479, 680,1439,4354,1001,1759, 413,3459,3395,2492,1431, 459, # 3920
|
||||
4355,1125,3329,2265,1953,1450,2065,2863, 849, 351,2678,3131,3254,3255,1104,1577, # 3936
|
||||
227,1351,1645,2453,2193,1421,2887, 812,2121, 634, 95,2435, 201,2312,4665,1646, # 3952
|
||||
1671,2743,1601,2554,2702,2648,2280,1315,1366,2089,3132,1573,3718,3965,1729,1189, # 3968
|
||||
328,2679,1077,1940,1136, 558,1283, 964,1195, 621,2074,1199,1743,3460,3619,1896, # 3984
|
||||
1916,1890,3836,2952,1154,2112,1064, 862, 378,3011,2066,2113,2803,1568,2839,6178, # 4000
|
||||
3088,2919,1941,1660,2004,1992,2194, 142, 707,1590,1708,1624,1922,1023,1836,1233, # 4016
|
||||
1004,2313, 789, 741,3620,6179,1609,2411,1200,4127,3719,3720,4666,2057,3721, 593, # 4032
|
||||
2840, 367,2920,1878,6180,3461,1521, 628,1168, 692,2211,2649, 300, 720,2067,2571, # 4048
|
||||
2953,3396, 959,2504,3966,3539,3462,1977, 701,6181, 954,1043, 800, 681, 183,3722, # 4064
|
||||
1803,1730,3540,4128,2103, 815,2314, 174, 467, 230,2454,1093,2134, 755,3541,3397, # 4080
|
||||
1141,1162,6182,1738,2039, 270,3256,2513,1005,1647,2185,3837, 858,1679,1897,1719, # 4096
|
||||
2954,2324,1806, 402, 670, 167,4129,1498,2158,2104, 750,6183, 915, 189,1680,1551, # 4112
|
||||
455,4356,1501,2455, 405,1095,2955, 338,1586,1266,1819, 570, 641,1324, 237,1556, # 4128
|
||||
2650,1388,3723,6184,1368,2384,1343,1978,3089,2436, 879,3724, 792,1191, 758,3012, # 4144
|
||||
1411,2135,1322,4357, 240,4667,1848,3725,1574,6185, 420,3045,1546,1391, 714,4358, # 4160
|
||||
1967, 941,1864, 863, 664, 426, 560,1731,2680,1785,2864,1949,2363, 403,3330,1415, # 4176
|
||||
1279,2136,1697,2335, 204, 721,2097,3838, 90,6186,2085,2505, 191,3967, 124,2148, # 4192
|
||||
1376,1798,1178,1107,1898,1405, 860,4359,1243,1272,2375,2983,1558,2456,1638, 113, # 4208
|
||||
3621, 578,1923,2609, 880, 386,4130, 784,2186,2266,1422,2956,2172,1722, 497, 263, # 4224
|
||||
2514,1267,2412,2610, 177,2703,3542, 774,1927,1344, 616,1432,1595,1018, 172,4360, # 4240
|
||||
2325, 911,4361, 438,1468,3622, 794,3968,2024,2173,1681,1829,2957, 945, 895,3090, # 4256
|
||||
575,2212,2476, 475,2401,2681, 785,2744,1745,2293,2555,1975,3133,2865, 394,4668, # 4272
|
||||
3839, 635,4131, 639, 202,1507,2195,2766,1345,1435,2572,3726,1908,1184,1181,2457, # 4288
|
||||
3727,3134,4362, 843,2611, 437, 916,4669, 234, 769,1884,3046,3047,3623, 833,6187, # 4304
|
||||
1639,2250,2402,1355,1185,2010,2047, 999, 525,1732,1290,1488,2612, 948,1578,3728, # 4320
|
||||
2413,2477,1216,2725,2159, 334,3840,1328,3624,2921,1525,4132, 564,1056, 891,4363, # 4336
|
||||
1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352
|
||||
2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512
|
||||
)
|
||||
|
||||
|
||||
233
venv/Lib/site-packages/pip/_vendor/chardet/jpcntx.py
Normal file
233
venv/Lib/site-packages/pip/_vendor/chardet/jpcntx.py
Normal file
@@ -0,0 +1,233 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Communicator client code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
|
||||
# This is hiragana 2-char sequence table, the number in each cell represents its frequency category
|
||||
jp2CharContext = (
|
||||
(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1),
|
||||
(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4),
|
||||
(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4),
|
||||
(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4),
|
||||
(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3),
|
||||
(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3),
|
||||
(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3),
|
||||
(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4),
|
||||
(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3),
|
||||
(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4),
|
||||
(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3),
|
||||
(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5),
|
||||
(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3),
|
||||
(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5),
|
||||
(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4),
|
||||
(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4),
|
||||
(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3),
|
||||
(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3),
|
||||
(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3),
|
||||
(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5),
|
||||
(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4),
|
||||
(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5),
|
||||
(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3),
|
||||
(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4),
|
||||
(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4),
|
||||
(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4),
|
||||
(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1),
|
||||
(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0),
|
||||
(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3),
|
||||
(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0),
|
||||
(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3),
|
||||
(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3),
|
||||
(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5),
|
||||
(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4),
|
||||
(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5),
|
||||
(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3),
|
||||
(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3),
|
||||
(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3),
|
||||
(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3),
|
||||
(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4),
|
||||
(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4),
|
||||
(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2),
|
||||
(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3),
|
||||
(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3),
|
||||
(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3),
|
||||
(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4),
|
||||
(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3),
|
||||
(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4),
|
||||
(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3),
|
||||
(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3),
|
||||
(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4),
|
||||
(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4),
|
||||
(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3),
|
||||
(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4),
|
||||
(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4),
|
||||
(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3),
|
||||
(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4),
|
||||
(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4),
|
||||
(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4),
|
||||
(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3),
|
||||
(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2),
|
||||
(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2),
|
||||
(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3),
|
||||
(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3),
|
||||
(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5),
|
||||
(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3),
|
||||
(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4),
|
||||
(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4),
|
||||
(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
|
||||
(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3),
|
||||
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1),
|
||||
(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2),
|
||||
(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3),
|
||||
(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1),
|
||||
)
|
||||
|
||||
class JapaneseContextAnalysis(object):
|
||||
NUM_OF_CATEGORY = 6
|
||||
DONT_KNOW = -1
|
||||
ENOUGH_REL_THRESHOLD = 100
|
||||
MAX_REL_THRESHOLD = 1000
|
||||
MINIMUM_DATA_THRESHOLD = 4
|
||||
|
||||
def __init__(self):
|
||||
self._total_rel = None
|
||||
self._rel_sample = None
|
||||
self._need_to_skip_char_num = None
|
||||
self._last_char_order = None
|
||||
self._done = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._total_rel = 0 # total sequence received
|
||||
# category counters, each integer counts sequence in its category
|
||||
self._rel_sample = [0] * self.NUM_OF_CATEGORY
|
||||
# if last byte in current buffer is not the last byte of a character,
|
||||
# we need to know how many bytes to skip in next buffer
|
||||
self._need_to_skip_char_num = 0
|
||||
self._last_char_order = -1 # The order of previous char
|
||||
# If this flag is set to True, detection is done and conclusion has
|
||||
# been made
|
||||
self._done = False
|
||||
|
||||
def feed(self, byte_str, num_bytes):
|
||||
if self._done:
|
||||
return
|
||||
|
||||
# The buffer we got is byte oriented, and a character may span in more than one
|
||||
# buffers. In case the last one or two byte in last buffer is not
|
||||
# complete, we record how many byte needed to complete that character
|
||||
# and skip these bytes here. We can choose to record those bytes as
|
||||
# well and analyse the character once it is complete, but since a
|
||||
# character will not make much difference, by simply skipping
|
||||
# this character will simply our logic and improve performance.
|
||||
i = self._need_to_skip_char_num
|
||||
while i < num_bytes:
|
||||
order, char_len = self.get_order(byte_str[i:i + 2])
|
||||
i += char_len
|
||||
if i > num_bytes:
|
||||
self._need_to_skip_char_num = i - num_bytes
|
||||
self._last_char_order = -1
|
||||
else:
|
||||
if (order != -1) and (self._last_char_order != -1):
|
||||
self._total_rel += 1
|
||||
if self._total_rel > self.MAX_REL_THRESHOLD:
|
||||
self._done = True
|
||||
break
|
||||
self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1
|
||||
self._last_char_order = order
|
||||
|
||||
def got_enough_data(self):
|
||||
return self._total_rel > self.ENOUGH_REL_THRESHOLD
|
||||
|
||||
def get_confidence(self):
|
||||
# This is just one way to calculate confidence. It works well for me.
|
||||
if self._total_rel > self.MINIMUM_DATA_THRESHOLD:
|
||||
return (self._total_rel - self._rel_sample[0]) / self._total_rel
|
||||
else:
|
||||
return self.DONT_KNOW
|
||||
|
||||
def get_order(self, byte_str):
|
||||
return -1, 1
|
||||
|
||||
class SJISContextAnalysis(JapaneseContextAnalysis):
|
||||
def __init__(self):
|
||||
super(SJISContextAnalysis, self).__init__()
|
||||
self._charset_name = "SHIFT_JIS"
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self._charset_name
|
||||
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (0x81 <= first_char <= 0x9F) or (0xE0 <= first_char <= 0xFC):
|
||||
char_len = 2
|
||||
if (first_char == 0x87) or (0xFA <= first_char <= 0xFC):
|
||||
self._charset_name = "CP932"
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 202) and (0x9F <= second_char <= 0xF1):
|
||||
return second_char - 0x9F, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
class EUCJPContextAnalysis(JapaneseContextAnalysis):
|
||||
def get_order(self, byte_str):
|
||||
if not byte_str:
|
||||
return -1, 1
|
||||
# find out current char's byte length
|
||||
first_char = byte_str[0]
|
||||
if (first_char == 0x8E) or (0xA1 <= first_char <= 0xFE):
|
||||
char_len = 2
|
||||
elif first_char == 0x8F:
|
||||
char_len = 3
|
||||
else:
|
||||
char_len = 1
|
||||
|
||||
# return its order if it is hiragana
|
||||
if len(byte_str) > 1:
|
||||
second_char = byte_str[1]
|
||||
if (first_char == 0xA4) and (0xA1 <= second_char <= 0xF3):
|
||||
return second_char - 0xA1, char_len
|
||||
|
||||
return -1, char_len
|
||||
|
||||
|
||||
4650
venv/Lib/site-packages/pip/_vendor/chardet/langbulgarianmodel.py
Normal file
4650
venv/Lib/site-packages/pip/_vendor/chardet/langbulgarianmodel.py
Normal file
File diff suppressed because it is too large
Load Diff
4398
venv/Lib/site-packages/pip/_vendor/chardet/langgreekmodel.py
Normal file
4398
venv/Lib/site-packages/pip/_vendor/chardet/langgreekmodel.py
Normal file
File diff suppressed because it is too large
Load Diff
4383
venv/Lib/site-packages/pip/_vendor/chardet/langhebrewmodel.py
Normal file
4383
venv/Lib/site-packages/pip/_vendor/chardet/langhebrewmodel.py
Normal file
File diff suppressed because it is too large
Load Diff
4650
venv/Lib/site-packages/pip/_vendor/chardet/langhungarianmodel.py
Normal file
4650
venv/Lib/site-packages/pip/_vendor/chardet/langhungarianmodel.py
Normal file
File diff suppressed because it is too large
Load Diff
5718
venv/Lib/site-packages/pip/_vendor/chardet/langrussianmodel.py
Normal file
5718
venv/Lib/site-packages/pip/_vendor/chardet/langrussianmodel.py
Normal file
File diff suppressed because it is too large
Load Diff
4383
venv/Lib/site-packages/pip/_vendor/chardet/langthaimodel.py
Normal file
4383
venv/Lib/site-packages/pip/_vendor/chardet/langthaimodel.py
Normal file
File diff suppressed because it is too large
Load Diff
4383
venv/Lib/site-packages/pip/_vendor/chardet/langturkishmodel.py
Normal file
4383
venv/Lib/site-packages/pip/_vendor/chardet/langturkishmodel.py
Normal file
File diff suppressed because it is too large
Load Diff
145
venv/Lib/site-packages/pip/_vendor/chardet/latin1prober.py
Normal file
145
venv/Lib/site-packages/pip/_vendor/chardet/latin1prober.py
Normal file
@@ -0,0 +1,145 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState
|
||||
|
||||
FREQ_CAT_NUM = 4
|
||||
|
||||
UDF = 0 # undefined
|
||||
OTH = 1 # other
|
||||
ASC = 2 # ascii capital letter
|
||||
ASS = 3 # ascii small letter
|
||||
ACV = 4 # accent capital vowel
|
||||
ACO = 5 # accent capital other
|
||||
ASV = 6 # accent small vowel
|
||||
ASO = 7 # accent small other
|
||||
CLASS_NUM = 8 # total classes
|
||||
|
||||
Latin1_CharToClass = (
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
|
||||
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
|
||||
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
|
||||
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
|
||||
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
|
||||
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
|
||||
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
|
||||
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
|
||||
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
|
||||
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
|
||||
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
|
||||
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
|
||||
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
|
||||
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
|
||||
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
|
||||
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
|
||||
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
|
||||
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
|
||||
)
|
||||
|
||||
# 0 : illegal
|
||||
# 1 : very unlikely
|
||||
# 2 : normal
|
||||
# 3 : very likely
|
||||
Latin1ClassModel = (
|
||||
# UDF OTH ASC ASS ACV ACO ASV ASO
|
||||
0, 0, 0, 0, 0, 0, 0, 0, # UDF
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # OTH
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ASC
|
||||
0, 3, 3, 3, 1, 1, 3, 3, # ASS
|
||||
0, 3, 3, 3, 1, 2, 1, 2, # ACV
|
||||
0, 3, 3, 3, 3, 3, 3, 3, # ACO
|
||||
0, 3, 1, 3, 1, 1, 1, 3, # ASV
|
||||
0, 3, 1, 3, 1, 1, 3, 3, # ASO
|
||||
)
|
||||
|
||||
|
||||
class Latin1Prober(CharSetProber):
|
||||
def __init__(self):
|
||||
super(Latin1Prober, self).__init__()
|
||||
self._last_char_class = None
|
||||
self._freq_counter = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self._last_char_class = OTH
|
||||
self._freq_counter = [0] * FREQ_CAT_NUM
|
||||
CharSetProber.reset(self)
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "ISO-8859-1"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
byte_str = self.filter_with_english_letters(byte_str)
|
||||
for c in byte_str:
|
||||
char_class = Latin1_CharToClass[c]
|
||||
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
|
||||
+ char_class]
|
||||
if freq == 0:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
self._freq_counter[freq] += 1
|
||||
self._last_char_class = char_class
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
if self.state == ProbingState.NOT_ME:
|
||||
return 0.01
|
||||
|
||||
total = sum(self._freq_counter)
|
||||
if total < 0.01:
|
||||
confidence = 0.0
|
||||
else:
|
||||
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
|
||||
/ total)
|
||||
if confidence < 0.0:
|
||||
confidence = 0.0
|
||||
# lower the confidence of latin1 so that other more accurate
|
||||
# detector can take priority.
|
||||
confidence = confidence * 0.73
|
||||
return confidence
|
||||
@@ -0,0 +1,91 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class MultiByteCharSetProber(CharSetProber):
|
||||
"""
|
||||
MultiByteCharSetProber
|
||||
"""
|
||||
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
|
||||
self.distribution_analyzer = None
|
||||
self.coding_sm = None
|
||||
self._last_char = [0, 0]
|
||||
|
||||
def reset(self):
|
||||
super(MultiByteCharSetProber, self).reset()
|
||||
if self.coding_sm:
|
||||
self.coding_sm.reset()
|
||||
if self.distribution_analyzer:
|
||||
self.distribution_analyzer.reset()
|
||||
self._last_char = [0, 0]
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
raise NotImplementedError
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.distribution_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
return self.distribution_analyzer.get_confidence()
|
||||
@@ -0,0 +1,54 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
# Proofpoint, Inc.
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .utf8prober import UTF8Prober
|
||||
from .sjisprober import SJISProber
|
||||
from .eucjpprober import EUCJPProber
|
||||
from .gb2312prober import GB2312Prober
|
||||
from .euckrprober import EUCKRProber
|
||||
from .cp949prober import CP949Prober
|
||||
from .big5prober import Big5Prober
|
||||
from .euctwprober import EUCTWProber
|
||||
|
||||
|
||||
class MBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self, lang_filter=None):
|
||||
super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
|
||||
self.probers = [
|
||||
UTF8Prober(),
|
||||
SJISProber(),
|
||||
EUCJPProber(),
|
||||
GB2312Prober(),
|
||||
EUCKRProber(),
|
||||
CP949Prober(),
|
||||
Big5Prober(),
|
||||
EUCTWProber()
|
||||
]
|
||||
self.reset()
|
||||
572
venv/Lib/site-packages/pip/_vendor/chardet/mbcssm.py
Normal file
572
venv/Lib/site-packages/pip/_vendor/chardet/mbcssm.py
Normal file
@@ -0,0 +1,572 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .enums import MachineState
|
||||
|
||||
# BIG5
|
||||
|
||||
BIG5_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
4,4,4,4,4,4,4,4, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
4,3,3,3,3,3,3,3, # a0 - a7
|
||||
3,3,3,3,3,3,3,3, # a8 - af
|
||||
3,3,3,3,3,3,3,3, # b0 - b7
|
||||
3,3,3,3,3,3,3,3, # b8 - bf
|
||||
3,3,3,3,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
BIG5_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17
|
||||
)
|
||||
|
||||
BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0)
|
||||
|
||||
BIG5_SM_MODEL = {'class_table': BIG5_CLS,
|
||||
'class_factor': 5,
|
||||
'state_table': BIG5_ST,
|
||||
'char_len_table': BIG5_CHAR_LEN_TABLE,
|
||||
'name': 'Big5'}
|
||||
|
||||
# CP949
|
||||
|
||||
CP949_CLS = (
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f
|
||||
1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f
|
||||
1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f
|
||||
4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f
|
||||
1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f
|
||||
5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f
|
||||
0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f
|
||||
6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f
|
||||
6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af
|
||||
7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf
|
||||
7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef
|
||||
2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff
|
||||
)
|
||||
|
||||
CP949_ST = (
|
||||
#cls= 0 1 2 3 4 5 6 7 8 9 # previous state =
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START, 4, 5,MachineState.ERROR, 6, # MachineState.START
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, # MachineState.ERROR
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 3
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 4
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6
|
||||
)
|
||||
|
||||
CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2)
|
||||
|
||||
CP949_SM_MODEL = {'class_table': CP949_CLS,
|
||||
'class_factor': 10,
|
||||
'state_table': CP949_ST,
|
||||
'char_len_table': CP949_CHAR_LEN_TABLE,
|
||||
'name': 'CP949'}
|
||||
|
||||
# EUC-JP
|
||||
|
||||
EUCJP_CLS = (
|
||||
4,4,4,4,4,4,4,4, # 00 - 07
|
||||
4,4,4,4,4,4,5,5, # 08 - 0f
|
||||
4,4,4,4,4,4,4,4, # 10 - 17
|
||||
4,4,4,5,4,4,4,4, # 18 - 1f
|
||||
4,4,4,4,4,4,4,4, # 20 - 27
|
||||
4,4,4,4,4,4,4,4, # 28 - 2f
|
||||
4,4,4,4,4,4,4,4, # 30 - 37
|
||||
4,4,4,4,4,4,4,4, # 38 - 3f
|
||||
4,4,4,4,4,4,4,4, # 40 - 47
|
||||
4,4,4,4,4,4,4,4, # 48 - 4f
|
||||
4,4,4,4,4,4,4,4, # 50 - 57
|
||||
4,4,4,4,4,4,4,4, # 58 - 5f
|
||||
4,4,4,4,4,4,4,4, # 60 - 67
|
||||
4,4,4,4,4,4,4,4, # 68 - 6f
|
||||
4,4,4,4,4,4,4,4, # 70 - 77
|
||||
4,4,4,4,4,4,4,4, # 78 - 7f
|
||||
5,5,5,5,5,5,5,5, # 80 - 87
|
||||
5,5,5,5,5,5,1,3, # 88 - 8f
|
||||
5,5,5,5,5,5,5,5, # 90 - 97
|
||||
5,5,5,5,5,5,5,5, # 98 - 9f
|
||||
5,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,0,5 # f8 - ff
|
||||
)
|
||||
|
||||
EUCJP_ST = (
|
||||
3, 4, 3, 5,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f
|
||||
3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27
|
||||
)
|
||||
|
||||
EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0)
|
||||
|
||||
EUCJP_SM_MODEL = {'class_table': EUCJP_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': EUCJP_ST,
|
||||
'char_len_table': EUCJP_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-JP'}
|
||||
|
||||
# EUC-KR
|
||||
|
||||
EUCKR_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,3,3,3, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,3,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
2,2,2,2,2,2,2,2, # e0 - e7
|
||||
2,2,2,2,2,2,2,2, # e8 - ef
|
||||
2,2,2,2,2,2,2,2, # f0 - f7
|
||||
2,2,2,2,2,2,2,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCKR_ST = (
|
||||
MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f
|
||||
)
|
||||
|
||||
EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0)
|
||||
|
||||
EUCKR_SM_MODEL = {'class_table': EUCKR_CLS,
|
||||
'class_factor': 4,
|
||||
'state_table': EUCKR_ST,
|
||||
'char_len_table': EUCKR_CHAR_LEN_TABLE,
|
||||
'name': 'EUC-KR'}
|
||||
|
||||
# EUC-TW
|
||||
|
||||
EUCTW_CLS = (
|
||||
2,2,2,2,2,2,2,2, # 00 - 07
|
||||
2,2,2,2,2,2,0,0, # 08 - 0f
|
||||
2,2,2,2,2,2,2,2, # 10 - 17
|
||||
2,2,2,0,2,2,2,2, # 18 - 1f
|
||||
2,2,2,2,2,2,2,2, # 20 - 27
|
||||
2,2,2,2,2,2,2,2, # 28 - 2f
|
||||
2,2,2,2,2,2,2,2, # 30 - 37
|
||||
2,2,2,2,2,2,2,2, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,2, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,6,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,3,4,4,4,4,4,4, # a0 - a7
|
||||
5,5,1,1,1,1,1,1, # a8 - af
|
||||
1,1,1,1,1,1,1,1, # b0 - b7
|
||||
1,1,1,1,1,1,1,1, # b8 - bf
|
||||
1,1,3,1,3,3,3,3, # c0 - c7
|
||||
3,3,3,3,3,3,3,3, # c8 - cf
|
||||
3,3,3,3,3,3,3,3, # d0 - d7
|
||||
3,3,3,3,3,3,3,3, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,3,3,3, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,3,3,0 # f8 - ff
|
||||
)
|
||||
|
||||
EUCTW_ST = (
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START, 3, 3, 3, 4,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.ERROR,#10-17
|
||||
MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27
|
||||
MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3)
|
||||
|
||||
EUCTW_SM_MODEL = {'class_table': EUCTW_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': EUCTW_ST,
|
||||
'char_len_table': EUCTW_CHAR_LEN_TABLE,
|
||||
'name': 'x-euc-tw'}
|
||||
|
||||
# GB2312
|
||||
|
||||
GB2312_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
3,3,3,3,3,3,3,3, # 30 - 37
|
||||
3,3,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,4, # 78 - 7f
|
||||
5,6,6,6,6,6,6,6, # 80 - 87
|
||||
6,6,6,6,6,6,6,6, # 88 - 8f
|
||||
6,6,6,6,6,6,6,6, # 90 - 97
|
||||
6,6,6,6,6,6,6,6, # 98 - 9f
|
||||
6,6,6,6,6,6,6,6, # a0 - a7
|
||||
6,6,6,6,6,6,6,6, # a8 - af
|
||||
6,6,6,6,6,6,6,6, # b0 - b7
|
||||
6,6,6,6,6,6,6,6, # b8 - bf
|
||||
6,6,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
6,6,6,6,6,6,6,6, # e0 - e7
|
||||
6,6,6,6,6,6,6,6, # e8 - ef
|
||||
6,6,6,6,6,6,6,6, # f0 - f7
|
||||
6,6,6,6,6,6,6,0 # f8 - ff
|
||||
)
|
||||
|
||||
GB2312_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, 3,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,#10-17
|
||||
4,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f
|
||||
)
|
||||
|
||||
# To be accurate, the length of class 6 can be either 2 or 4.
|
||||
# But it is not necessary to discriminate between the two since
|
||||
# it is used for frequency analysis only, and we are validating
|
||||
# each code range there as well. So it is safe to set it to be
|
||||
# 2 here.
|
||||
GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2)
|
||||
|
||||
GB2312_SM_MODEL = {'class_table': GB2312_CLS,
|
||||
'class_factor': 7,
|
||||
'state_table': GB2312_ST,
|
||||
'char_len_table': GB2312_CHAR_LEN_TABLE,
|
||||
'name': 'GB2312'}
|
||||
|
||||
# Shift_JIS
|
||||
|
||||
SJIS_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
2,2,2,2,2,2,2,2, # 40 - 47
|
||||
2,2,2,2,2,2,2,2, # 48 - 4f
|
||||
2,2,2,2,2,2,2,2, # 50 - 57
|
||||
2,2,2,2,2,2,2,2, # 58 - 5f
|
||||
2,2,2,2,2,2,2,2, # 60 - 67
|
||||
2,2,2,2,2,2,2,2, # 68 - 6f
|
||||
2,2,2,2,2,2,2,2, # 70 - 77
|
||||
2,2,2,2,2,2,2,1, # 78 - 7f
|
||||
3,3,3,3,3,2,2,3, # 80 - 87
|
||||
3,3,3,3,3,3,3,3, # 88 - 8f
|
||||
3,3,3,3,3,3,3,3, # 90 - 97
|
||||
3,3,3,3,3,3,3,3, # 98 - 9f
|
||||
#0xa0 is illegal in sjis encoding, but some pages does
|
||||
#contain such byte. We need to be more error forgiven.
|
||||
2,2,2,2,2,2,2,2, # a0 - a7
|
||||
2,2,2,2,2,2,2,2, # a8 - af
|
||||
2,2,2,2,2,2,2,2, # b0 - b7
|
||||
2,2,2,2,2,2,2,2, # b8 - bf
|
||||
2,2,2,2,2,2,2,2, # c0 - c7
|
||||
2,2,2,2,2,2,2,2, # c8 - cf
|
||||
2,2,2,2,2,2,2,2, # d0 - d7
|
||||
2,2,2,2,2,2,2,2, # d8 - df
|
||||
3,3,3,3,3,3,3,3, # e0 - e7
|
||||
3,3,3,3,3,4,4,4, # e8 - ef
|
||||
3,3,3,3,3,3,3,3, # f0 - f7
|
||||
3,3,3,3,3,0,0,0) # f8 - ff
|
||||
|
||||
|
||||
SJIS_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17
|
||||
)
|
||||
|
||||
SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0)
|
||||
|
||||
SJIS_SM_MODEL = {'class_table': SJIS_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': SJIS_ST,
|
||||
'char_len_table': SJIS_CHAR_LEN_TABLE,
|
||||
'name': 'Shift_JIS'}
|
||||
|
||||
# UCS2-BE
|
||||
|
||||
UCS2BE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2BE_ST = (
|
||||
5, 7, 7,MachineState.ERROR, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
6, 6, 6, 6, 6,MachineState.ITS_ME, 6, 6,#18-1f
|
||||
6, 6, 6, 6, 5, 7, 7,MachineState.ERROR,#20-27
|
||||
5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f
|
||||
6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2)
|
||||
|
||||
UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2BE_ST,
|
||||
'char_len_table': UCS2BE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16BE'}
|
||||
|
||||
# UCS2-LE
|
||||
|
||||
UCS2LE_CLS = (
|
||||
0,0,0,0,0,0,0,0, # 00 - 07
|
||||
0,0,1,0,0,2,0,0, # 08 - 0f
|
||||
0,0,0,0,0,0,0,0, # 10 - 17
|
||||
0,0,0,3,0,0,0,0, # 18 - 1f
|
||||
0,0,0,0,0,0,0,0, # 20 - 27
|
||||
0,3,3,3,3,3,0,0, # 28 - 2f
|
||||
0,0,0,0,0,0,0,0, # 30 - 37
|
||||
0,0,0,0,0,0,0,0, # 38 - 3f
|
||||
0,0,0,0,0,0,0,0, # 40 - 47
|
||||
0,0,0,0,0,0,0,0, # 48 - 4f
|
||||
0,0,0,0,0,0,0,0, # 50 - 57
|
||||
0,0,0,0,0,0,0,0, # 58 - 5f
|
||||
0,0,0,0,0,0,0,0, # 60 - 67
|
||||
0,0,0,0,0,0,0,0, # 68 - 6f
|
||||
0,0,0,0,0,0,0,0, # 70 - 77
|
||||
0,0,0,0,0,0,0,0, # 78 - 7f
|
||||
0,0,0,0,0,0,0,0, # 80 - 87
|
||||
0,0,0,0,0,0,0,0, # 88 - 8f
|
||||
0,0,0,0,0,0,0,0, # 90 - 97
|
||||
0,0,0,0,0,0,0,0, # 98 - 9f
|
||||
0,0,0,0,0,0,0,0, # a0 - a7
|
||||
0,0,0,0,0,0,0,0, # a8 - af
|
||||
0,0,0,0,0,0,0,0, # b0 - b7
|
||||
0,0,0,0,0,0,0,0, # b8 - bf
|
||||
0,0,0,0,0,0,0,0, # c0 - c7
|
||||
0,0,0,0,0,0,0,0, # c8 - cf
|
||||
0,0,0,0,0,0,0,0, # d0 - d7
|
||||
0,0,0,0,0,0,0,0, # d8 - df
|
||||
0,0,0,0,0,0,0,0, # e0 - e7
|
||||
0,0,0,0,0,0,0,0, # e8 - ef
|
||||
0,0,0,0,0,0,0,0, # f0 - f7
|
||||
0,0,0,0,0,0,4,5 # f8 - ff
|
||||
)
|
||||
|
||||
UCS2LE_ST = (
|
||||
6, 6, 7, 6, 4, 3,MachineState.ERROR,MachineState.ERROR,#00-07
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME, 5, 5, 5,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#10-17
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR, 6, 6,#18-1f
|
||||
7, 6, 8, 8, 5, 5, 5,MachineState.ERROR,#20-27
|
||||
5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f
|
||||
5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37
|
||||
)
|
||||
|
||||
UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2)
|
||||
|
||||
UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS,
|
||||
'class_factor': 6,
|
||||
'state_table': UCS2LE_ST,
|
||||
'char_len_table': UCS2LE_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-16LE'}
|
||||
|
||||
# UTF-8
|
||||
|
||||
UTF8_CLS = (
|
||||
1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value
|
||||
1,1,1,1,1,1,0,0, # 08 - 0f
|
||||
1,1,1,1,1,1,1,1, # 10 - 17
|
||||
1,1,1,0,1,1,1,1, # 18 - 1f
|
||||
1,1,1,1,1,1,1,1, # 20 - 27
|
||||
1,1,1,1,1,1,1,1, # 28 - 2f
|
||||
1,1,1,1,1,1,1,1, # 30 - 37
|
||||
1,1,1,1,1,1,1,1, # 38 - 3f
|
||||
1,1,1,1,1,1,1,1, # 40 - 47
|
||||
1,1,1,1,1,1,1,1, # 48 - 4f
|
||||
1,1,1,1,1,1,1,1, # 50 - 57
|
||||
1,1,1,1,1,1,1,1, # 58 - 5f
|
||||
1,1,1,1,1,1,1,1, # 60 - 67
|
||||
1,1,1,1,1,1,1,1, # 68 - 6f
|
||||
1,1,1,1,1,1,1,1, # 70 - 77
|
||||
1,1,1,1,1,1,1,1, # 78 - 7f
|
||||
2,2,2,2,3,3,3,3, # 80 - 87
|
||||
4,4,4,4,4,4,4,4, # 88 - 8f
|
||||
4,4,4,4,4,4,4,4, # 90 - 97
|
||||
4,4,4,4,4,4,4,4, # 98 - 9f
|
||||
5,5,5,5,5,5,5,5, # a0 - a7
|
||||
5,5,5,5,5,5,5,5, # a8 - af
|
||||
5,5,5,5,5,5,5,5, # b0 - b7
|
||||
5,5,5,5,5,5,5,5, # b8 - bf
|
||||
0,0,6,6,6,6,6,6, # c0 - c7
|
||||
6,6,6,6,6,6,6,6, # c8 - cf
|
||||
6,6,6,6,6,6,6,6, # d0 - d7
|
||||
6,6,6,6,6,6,6,6, # d8 - df
|
||||
7,8,8,8,8,8,8,8, # e0 - e7
|
||||
8,8,8,8,8,9,8,8, # e8 - ef
|
||||
10,11,11,11,11,11,11,11, # f0 - f7
|
||||
12,13,13,13,14,15,0,0 # f8 - ff
|
||||
)
|
||||
|
||||
UTF8_ST = (
|
||||
MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12, 10,#00-07
|
||||
9, 11, 8, 7, 6, 5, 4, 3,#08-0f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#10-17
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#18-1f
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#20-27
|
||||
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#28-2f
|
||||
MachineState.ERROR,MachineState.ERROR, 5, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#30-37
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#38-3f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5, 5,MachineState.ERROR,MachineState.ERROR,#40-47
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#48-4f
|
||||
MachineState.ERROR,MachineState.ERROR, 7, 7, 7, 7,MachineState.ERROR,MachineState.ERROR,#50-57
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#58-5f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 7, 7,MachineState.ERROR,MachineState.ERROR,#60-67
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#68-6f
|
||||
MachineState.ERROR,MachineState.ERROR, 9, 9, 9, 9,MachineState.ERROR,MachineState.ERROR,#70-77
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#78-7f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 9,MachineState.ERROR,MachineState.ERROR,#80-87
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#88-8f
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,#90-97
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#98-9f
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 12,MachineState.ERROR,MachineState.ERROR,#a0-a7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#a8-af
|
||||
MachineState.ERROR,MachineState.ERROR, 12, 12, 12,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b0-b7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#b8-bf
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7
|
||||
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf
|
||||
)
|
||||
|
||||
UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6)
|
||||
|
||||
UTF8_SM_MODEL = {'class_table': UTF8_CLS,
|
||||
'class_factor': 16,
|
||||
'state_table': UTF8_ST,
|
||||
'char_len_table': UTF8_CHAR_LEN_TABLE,
|
||||
'name': 'UTF-8'}
|
||||
310
venv/Lib/site-packages/pip/_vendor/chardet/metadata/languages.py
Normal file
310
venv/Lib/site-packages/pip/_vendor/chardet/metadata/languages.py
Normal file
@@ -0,0 +1,310 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Metadata about languages used by our model training code for our
|
||||
SingleByteCharSetProbers. Could be used for other things in the future.
|
||||
|
||||
This code is based on the language metadata from the uchardet project.
|
||||
"""
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
from string import ascii_letters
|
||||
|
||||
|
||||
# TODO: Add Ukranian (KOI8-U)
|
||||
|
||||
class Language(object):
|
||||
"""Metadata about a language useful for training models
|
||||
|
||||
:ivar name: The human name for the language, in English.
|
||||
:type name: str
|
||||
:ivar iso_code: 2-letter ISO 639-1 if possible, 3-letter ISO code otherwise,
|
||||
or use another catalog as a last resort.
|
||||
:type iso_code: str
|
||||
:ivar use_ascii: Whether or not ASCII letters should be included in trained
|
||||
models.
|
||||
:type use_ascii: bool
|
||||
:ivar charsets: The charsets we want to support and create data for.
|
||||
:type charsets: list of str
|
||||
:ivar alphabet: The characters in the language's alphabet. If `use_ascii` is
|
||||
`True`, you only need to add those not in the ASCII set.
|
||||
:type alphabet: str
|
||||
:ivar wiki_start_pages: The Wikipedia pages to start from if we're crawling
|
||||
Wikipedia for training data.
|
||||
:type wiki_start_pages: list of str
|
||||
"""
|
||||
def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None,
|
||||
alphabet=None, wiki_start_pages=None):
|
||||
super(Language, self).__init__()
|
||||
self.name = name
|
||||
self.iso_code = iso_code
|
||||
self.use_ascii = use_ascii
|
||||
self.charsets = charsets
|
||||
if self.use_ascii:
|
||||
if alphabet:
|
||||
alphabet += ascii_letters
|
||||
else:
|
||||
alphabet = ascii_letters
|
||||
elif not alphabet:
|
||||
raise ValueError('Must supply alphabet if use_ascii is False')
|
||||
self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None
|
||||
self.wiki_start_pages = wiki_start_pages
|
||||
|
||||
def __repr__(self):
|
||||
return '{}({})'.format(self.__class__.__name__,
|
||||
', '.join('{}={!r}'.format(k, v)
|
||||
for k, v in self.__dict__.items()
|
||||
if not k.startswith('_')))
|
||||
|
||||
|
||||
LANGUAGES = {'Arabic': Language(name='Arabic',
|
||||
iso_code='ar',
|
||||
use_ascii=False,
|
||||
# We only support encodings that use isolated
|
||||
# forms, because the current recommendation is
|
||||
# that the rendering system handles presentation
|
||||
# forms. This means we purposefully skip IBM864.
|
||||
charsets=['ISO-8859-6', 'WINDOWS-1256',
|
||||
'CP720', 'CP864'],
|
||||
alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ',
|
||||
wiki_start_pages=[u'الصفحة_الرئيسية']),
|
||||
'Belarusian': Language(name='Belarusian',
|
||||
iso_code='be',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'IBM866', 'MacCyrillic'],
|
||||
alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ'
|
||||
u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'),
|
||||
wiki_start_pages=[u'Галоўная_старонка']),
|
||||
'Bulgarian': Language(name='Bulgarian',
|
||||
iso_code='bg',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'IBM855'],
|
||||
alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ'
|
||||
u'абвгдежзийклмнопрстуфхцчшщъьюя'),
|
||||
wiki_start_pages=[u'Начална_страница']),
|
||||
'Czech': Language(name='Czech',
|
||||
iso_code='cz',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ',
|
||||
wiki_start_pages=[u'Hlavní_strana']),
|
||||
'Danish': Language(name='Danish',
|
||||
iso_code='da',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'æøåÆØÅ',
|
||||
wiki_start_pages=[u'Forside']),
|
||||
'German': Language(name='German',
|
||||
iso_code='de',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'WINDOWS-1252'],
|
||||
alphabet=u'äöüßÄÖÜ',
|
||||
wiki_start_pages=[u'Wikipedia:Hauptseite']),
|
||||
'Greek': Language(name='Greek',
|
||||
iso_code='el',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-7', 'WINDOWS-1253'],
|
||||
alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ'
|
||||
u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'),
|
||||
wiki_start_pages=[u'Πύλη:Κύρια']),
|
||||
'English': Language(name='English',
|
||||
iso_code='en',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'WINDOWS-1252'],
|
||||
wiki_start_pages=[u'Main_Page']),
|
||||
'Esperanto': Language(name='Esperanto',
|
||||
iso_code='eo',
|
||||
# Q, W, X, and Y not used at all
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-3'],
|
||||
alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz'
|
||||
u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'),
|
||||
wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']),
|
||||
'Spanish': Language(name='Spanish',
|
||||
iso_code='es',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ',
|
||||
wiki_start_pages=[u'Wikipedia:Portada']),
|
||||
'Estonian': Language(name='Estonian',
|
||||
iso_code='et',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-4', 'ISO-8859-13',
|
||||
'WINDOWS-1257'],
|
||||
# C, F, Š, Q, W, X, Y, Z, Ž are only for
|
||||
# loanwords
|
||||
alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ'
|
||||
u'abdeghijklmnoprstuvõäöü'),
|
||||
wiki_start_pages=[u'Esileht']),
|
||||
'Finnish': Language(name='Finnish',
|
||||
iso_code='fi',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ÅÄÖŠŽåäöšž',
|
||||
wiki_start_pages=[u'Wikipedia:Etusivu']),
|
||||
'French': Language(name='French',
|
||||
iso_code='fr',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ',
|
||||
wiki_start_pages=[u'Wikipédia:Accueil_principal',
|
||||
u'Bœuf (animal)']),
|
||||
'Hebrew': Language(name='Hebrew',
|
||||
iso_code='he',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-8', 'WINDOWS-1255'],
|
||||
alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ',
|
||||
wiki_start_pages=[u'עמוד_ראשי']),
|
||||
'Croatian': Language(name='Croatian',
|
||||
iso_code='hr',
|
||||
# Q, W, X, Y are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'abcčćdđefghijklmnoprsštuvzž'
|
||||
u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'),
|
||||
wiki_start_pages=[u'Glavna_stranica']),
|
||||
'Hungarian': Language(name='Hungarian',
|
||||
iso_code='hu',
|
||||
# Q, W, X, Y are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű'
|
||||
u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'),
|
||||
wiki_start_pages=[u'Kezdőlap']),
|
||||
'Italian': Language(name='Italian',
|
||||
iso_code='it',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ÀÈÉÌÒÓÙàèéìòóù',
|
||||
wiki_start_pages=[u'Pagina_principale']),
|
||||
'Lithuanian': Language(name='Lithuanian',
|
||||
iso_code='lt',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-13', 'WINDOWS-1257',
|
||||
'ISO-8859-4'],
|
||||
# Q, W, and X not used at all
|
||||
alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ'
|
||||
u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'),
|
||||
wiki_start_pages=[u'Pagrindinis_puslapis']),
|
||||
'Latvian': Language(name='Latvian',
|
||||
iso_code='lv',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-13', 'WINDOWS-1257',
|
||||
'ISO-8859-4'],
|
||||
# Q, W, X, Y are only for loanwords
|
||||
alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ'
|
||||
u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'),
|
||||
wiki_start_pages=[u'Sākumlapa']),
|
||||
'Macedonian': Language(name='Macedonian',
|
||||
iso_code='mk',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'MacCyrillic', 'IBM855'],
|
||||
alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ'
|
||||
u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'),
|
||||
wiki_start_pages=[u'Главна_страница']),
|
||||
'Dutch': Language(name='Dutch',
|
||||
iso_code='nl',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'WINDOWS-1252'],
|
||||
wiki_start_pages=[u'Hoofdpagina']),
|
||||
'Polish': Language(name='Polish',
|
||||
iso_code='pl',
|
||||
# Q and X are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ'
|
||||
u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'),
|
||||
wiki_start_pages=[u'Wikipedia:Strona_główna']),
|
||||
'Portuguese': Language(name='Portuguese',
|
||||
iso_code='pt',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-1', 'ISO-8859-15',
|
||||
'WINDOWS-1252'],
|
||||
alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú',
|
||||
wiki_start_pages=[u'Wikipédia:Página_principal']),
|
||||
'Romanian': Language(name='Romanian',
|
||||
iso_code='ro',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=u'ăâîșțĂÂÎȘȚ',
|
||||
wiki_start_pages=[u'Pagina_principală']),
|
||||
'Russian': Language(name='Russian',
|
||||
iso_code='ru',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'KOI8-R', 'MacCyrillic', 'IBM866',
|
||||
'IBM855'],
|
||||
alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'
|
||||
u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'),
|
||||
wiki_start_pages=[u'Заглавная_страница']),
|
||||
'Slovak': Language(name='Slovak',
|
||||
iso_code='sk',
|
||||
use_ascii=True,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ',
|
||||
wiki_start_pages=[u'Hlavná_stránka']),
|
||||
'Slovene': Language(name='Slovene',
|
||||
iso_code='sl',
|
||||
# Q, W, X, Y are only used for foreign words.
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-2', 'WINDOWS-1250'],
|
||||
alphabet=(u'abcčdefghijklmnoprsštuvzž'
|
||||
u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'),
|
||||
wiki_start_pages=[u'Glavna_stran']),
|
||||
# Serbian can be written in both Latin and Cyrillic, but there's no
|
||||
# simple way to get the Latin alphabet pages from Wikipedia through
|
||||
# the API, so for now we just support Cyrillic.
|
||||
'Serbian': Language(name='Serbian',
|
||||
iso_code='sr',
|
||||
alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ'
|
||||
u'абвгдђежзијклљмнњопрстћуфхцчџш'),
|
||||
charsets=['ISO-8859-5', 'WINDOWS-1251',
|
||||
'MacCyrillic', 'IBM855'],
|
||||
wiki_start_pages=[u'Главна_страна']),
|
||||
'Thai': Language(name='Thai',
|
||||
iso_code='th',
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-11', 'TIS-620', 'CP874'],
|
||||
alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛',
|
||||
wiki_start_pages=[u'หน้าหลัก']),
|
||||
'Turkish': Language(name='Turkish',
|
||||
iso_code='tr',
|
||||
# Q, W, and X are not used by Turkish
|
||||
use_ascii=False,
|
||||
charsets=['ISO-8859-3', 'ISO-8859-9',
|
||||
'WINDOWS-1254'],
|
||||
alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû'
|
||||
u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'),
|
||||
wiki_start_pages=[u'Ana_Sayfa']),
|
||||
'Vietnamese': Language(name='Vietnamese',
|
||||
iso_code='vi',
|
||||
use_ascii=False,
|
||||
# Windows-1258 is the only common 8-bit
|
||||
# Vietnamese encoding supported by Python.
|
||||
# From Wikipedia:
|
||||
# For systems that lack support for Unicode,
|
||||
# dozens of 8-bit Vietnamese code pages are
|
||||
# available.[1] The most common are VISCII
|
||||
# (TCVN 5712:1993), VPS, and Windows-1258.[3]
|
||||
# Where ASCII is required, such as when
|
||||
# ensuring readability in plain text e-mail,
|
||||
# Vietnamese letters are often encoded
|
||||
# according to Vietnamese Quoted-Readable
|
||||
# (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4]
|
||||
# though usage of either variable-width
|
||||
# scheme has declined dramatically following
|
||||
# the adoption of Unicode on the World Wide
|
||||
# Web.
|
||||
charsets=['WINDOWS-1258'],
|
||||
alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy'
|
||||
u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'),
|
||||
wiki_start_pages=[u'Chữ_Quốc_ngữ']),
|
||||
}
|
||||
145
venv/Lib/site-packages/pip/_vendor/chardet/sbcharsetprober.py
Normal file
145
venv/Lib/site-packages/pip/_vendor/chardet/sbcharsetprober.py
Normal file
@@ -0,0 +1,145 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from collections import namedtuple
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import CharacterCategory, ProbingState, SequenceLikelihood
|
||||
|
||||
|
||||
SingleByteCharSetModel = namedtuple('SingleByteCharSetModel',
|
||||
['charset_name',
|
||||
'language',
|
||||
'char_to_order_map',
|
||||
'language_model',
|
||||
'typical_positive_ratio',
|
||||
'keep_ascii_letters',
|
||||
'alphabet'])
|
||||
|
||||
|
||||
class SingleByteCharSetProber(CharSetProber):
|
||||
SAMPLE_SIZE = 64
|
||||
SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2
|
||||
POSITIVE_SHORTCUT_THRESHOLD = 0.95
|
||||
NEGATIVE_SHORTCUT_THRESHOLD = 0.05
|
||||
|
||||
def __init__(self, model, reversed=False, name_prober=None):
|
||||
super(SingleByteCharSetProber, self).__init__()
|
||||
self._model = model
|
||||
# TRUE if we need to reverse every pair in the model lookup
|
||||
self._reversed = reversed
|
||||
# Optional auxiliary prober for name decision
|
||||
self._name_prober = name_prober
|
||||
self._last_order = None
|
||||
self._seq_counters = None
|
||||
self._total_seqs = None
|
||||
self._total_char = None
|
||||
self._freq_char = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SingleByteCharSetProber, self).reset()
|
||||
# char order of last character
|
||||
self._last_order = 255
|
||||
self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
|
||||
self._total_seqs = 0
|
||||
self._total_char = 0
|
||||
# characters that fall in our sampling range
|
||||
self._freq_char = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.charset_name
|
||||
else:
|
||||
return self._model.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
if self._name_prober:
|
||||
return self._name_prober.language
|
||||
else:
|
||||
return self._model.language
|
||||
|
||||
def feed(self, byte_str):
|
||||
# TODO: Make filter_international_words keep things in self.alphabet
|
||||
if not self._model.keep_ascii_letters:
|
||||
byte_str = self.filter_international_words(byte_str)
|
||||
if not byte_str:
|
||||
return self.state
|
||||
char_to_order_map = self._model.char_to_order_map
|
||||
language_model = self._model.language_model
|
||||
for char in byte_str:
|
||||
order = char_to_order_map.get(char, CharacterCategory.UNDEFINED)
|
||||
# XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
|
||||
# CharacterCategory.SYMBOL is actually 253, so we use CONTROL
|
||||
# to make it closer to the original intent. The only difference
|
||||
# is whether or not we count digits and control characters for
|
||||
# _total_char purposes.
|
||||
if order < CharacterCategory.CONTROL:
|
||||
self._total_char += 1
|
||||
# TODO: Follow uchardet's lead and discount confidence for frequent
|
||||
# control characters.
|
||||
# See https://github.com/BYVoid/uchardet/commit/55b4f23971db61
|
||||
if order < self.SAMPLE_SIZE:
|
||||
self._freq_char += 1
|
||||
if self._last_order < self.SAMPLE_SIZE:
|
||||
self._total_seqs += 1
|
||||
if not self._reversed:
|
||||
lm_cat = language_model[self._last_order][order]
|
||||
else:
|
||||
lm_cat = language_model[order][self._last_order]
|
||||
self._seq_counters[lm_cat] += 1
|
||||
self._last_order = order
|
||||
|
||||
charset_name = self._model.charset_name
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
|
||||
confidence = self.get_confidence()
|
||||
if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, we have a winner',
|
||||
charset_name, confidence)
|
||||
self._state = ProbingState.FOUND_IT
|
||||
elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
|
||||
self.logger.debug('%s confidence = %s, below negative '
|
||||
'shortcut threshhold %s', charset_name,
|
||||
confidence,
|
||||
self.NEGATIVE_SHORTCUT_THRESHOLD)
|
||||
self._state = ProbingState.NOT_ME
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
r = 0.01
|
||||
if self._total_seqs > 0:
|
||||
r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
|
||||
self._total_seqs / self._model.typical_positive_ratio)
|
||||
r = r * self._freq_char / self._total_char
|
||||
if r >= 1.0:
|
||||
r = 0.99
|
||||
return r
|
||||
@@ -0,0 +1,83 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .hebrewprober import HebrewProber
|
||||
from .langbulgarianmodel import (ISO_8859_5_BULGARIAN_MODEL,
|
||||
WINDOWS_1251_BULGARIAN_MODEL)
|
||||
from .langgreekmodel import ISO_8859_7_GREEK_MODEL, WINDOWS_1253_GREEK_MODEL
|
||||
from .langhebrewmodel import WINDOWS_1255_HEBREW_MODEL
|
||||
# from .langhungarianmodel import (ISO_8859_2_HUNGARIAN_MODEL,
|
||||
# WINDOWS_1250_HUNGARIAN_MODEL)
|
||||
from .langrussianmodel import (IBM855_RUSSIAN_MODEL, IBM866_RUSSIAN_MODEL,
|
||||
ISO_8859_5_RUSSIAN_MODEL, KOI8_R_RUSSIAN_MODEL,
|
||||
MACCYRILLIC_RUSSIAN_MODEL,
|
||||
WINDOWS_1251_RUSSIAN_MODEL)
|
||||
from .langthaimodel import TIS_620_THAI_MODEL
|
||||
from .langturkishmodel import ISO_8859_9_TURKISH_MODEL
|
||||
from .sbcharsetprober import SingleByteCharSetProber
|
||||
|
||||
|
||||
class SBCSGroupProber(CharSetGroupProber):
|
||||
def __init__(self):
|
||||
super(SBCSGroupProber, self).__init__()
|
||||
hebrew_prober = HebrewProber()
|
||||
logical_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
|
||||
False, hebrew_prober)
|
||||
# TODO: See if using ISO-8859-8 Hebrew model works better here, since
|
||||
# it's actually the visual one
|
||||
visual_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL,
|
||||
True, hebrew_prober)
|
||||
hebrew_prober.set_model_probers(logical_hebrew_prober,
|
||||
visual_hebrew_prober)
|
||||
# TODO: ORDER MATTERS HERE. I changed the order vs what was in master
|
||||
# and several tests failed that did not before. Some thought
|
||||
# should be put into the ordering, and we should consider making
|
||||
# order not matter here, because that is very counter-intuitive.
|
||||
self.probers = [
|
||||
SingleByteCharSetProber(WINDOWS_1251_RUSSIAN_MODEL),
|
||||
SingleByteCharSetProber(KOI8_R_RUSSIAN_MODEL),
|
||||
SingleByteCharSetProber(ISO_8859_5_RUSSIAN_MODEL),
|
||||
SingleByteCharSetProber(MACCYRILLIC_RUSSIAN_MODEL),
|
||||
SingleByteCharSetProber(IBM866_RUSSIAN_MODEL),
|
||||
SingleByteCharSetProber(IBM855_RUSSIAN_MODEL),
|
||||
SingleByteCharSetProber(ISO_8859_7_GREEK_MODEL),
|
||||
SingleByteCharSetProber(WINDOWS_1253_GREEK_MODEL),
|
||||
SingleByteCharSetProber(ISO_8859_5_BULGARIAN_MODEL),
|
||||
SingleByteCharSetProber(WINDOWS_1251_BULGARIAN_MODEL),
|
||||
# TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
|
||||
# after we retrain model.
|
||||
# SingleByteCharSetProber(ISO_8859_2_HUNGARIAN_MODEL),
|
||||
# SingleByteCharSetProber(WINDOWS_1250_HUNGARIAN_MODEL),
|
||||
SingleByteCharSetProber(TIS_620_THAI_MODEL),
|
||||
SingleByteCharSetProber(ISO_8859_9_TURKISH_MODEL),
|
||||
hebrew_prober,
|
||||
logical_hebrew_prober,
|
||||
visual_hebrew_prober,
|
||||
]
|
||||
self.reset()
|
||||
92
venv/Lib/site-packages/pip/_vendor/chardet/sjisprober.py
Normal file
92
venv/Lib/site-packages/pip/_vendor/chardet/sjisprober.py
Normal file
@@ -0,0 +1,92 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .mbcharsetprober import MultiByteCharSetProber
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .chardistribution import SJISDistributionAnalysis
|
||||
from .jpcntx import SJISContextAnalysis
|
||||
from .mbcssm import SJIS_SM_MODEL
|
||||
from .enums import ProbingState, MachineState
|
||||
|
||||
|
||||
class SJISProber(MultiByteCharSetProber):
|
||||
def __init__(self):
|
||||
super(SJISProber, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
|
||||
self.distribution_analyzer = SJISDistributionAnalysis()
|
||||
self.context_analyzer = SJISContextAnalysis()
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(SJISProber, self).reset()
|
||||
self.context_analyzer.reset()
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return self.context_analyzer.charset_name
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return "Japanese"
|
||||
|
||||
def feed(self, byte_str):
|
||||
for i in range(len(byte_str)):
|
||||
coding_state = self.coding_sm.next_state(byte_str[i])
|
||||
if coding_state == MachineState.ERROR:
|
||||
self.logger.debug('%s %s prober hit error at byte %s',
|
||||
self.charset_name, self.language, i)
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
char_len = self.coding_sm.get_current_charlen()
|
||||
if i == 0:
|
||||
self._last_char[1] = byte_str[0]
|
||||
self.context_analyzer.feed(self._last_char[2 - char_len:],
|
||||
char_len)
|
||||
self.distribution_analyzer.feed(self._last_char, char_len)
|
||||
else:
|
||||
self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
|
||||
- char_len], char_len)
|
||||
self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
|
||||
char_len)
|
||||
|
||||
self._last_char[0] = byte_str[-1]
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if (self.context_analyzer.got_enough_data() and
|
||||
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
context_conf = self.context_analyzer.get_confidence()
|
||||
distrib_conf = self.distribution_analyzer.get_confidence()
|
||||
return max(context_conf, distrib_conf)
|
||||
286
venv/Lib/site-packages/pip/_vendor/chardet/universaldetector.py
Normal file
286
venv/Lib/site-packages/pip/_vendor/chardet/universaldetector.py
Normal file
@@ -0,0 +1,286 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is Mozilla Universal charset detector code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 2001
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
# Shy Shalom - original C code
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
"""
|
||||
Module containing the UniversalDetector detector class, which is the primary
|
||||
class a user of ``chardet`` should use.
|
||||
|
||||
:author: Mark Pilgrim (initial port to Python)
|
||||
:author: Shy Shalom (original C code)
|
||||
:author: Dan Blanchard (major refactoring for 3.0)
|
||||
:author: Ian Cordasco
|
||||
"""
|
||||
|
||||
|
||||
import codecs
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .charsetgroupprober import CharSetGroupProber
|
||||
from .enums import InputState, LanguageFilter, ProbingState
|
||||
from .escprober import EscCharSetProber
|
||||
from .latin1prober import Latin1Prober
|
||||
from .mbcsgroupprober import MBCSGroupProber
|
||||
from .sbcsgroupprober import SBCSGroupProber
|
||||
|
||||
|
||||
class UniversalDetector(object):
|
||||
"""
|
||||
The ``UniversalDetector`` class underlies the ``chardet.detect`` function
|
||||
and coordinates all of the different charset probers.
|
||||
|
||||
To get a ``dict`` containing an encoding and its confidence, you can simply
|
||||
run:
|
||||
|
||||
.. code::
|
||||
|
||||
u = UniversalDetector()
|
||||
u.feed(some_bytes)
|
||||
u.close()
|
||||
detected = u.result
|
||||
|
||||
"""
|
||||
|
||||
MINIMUM_THRESHOLD = 0.20
|
||||
HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]')
|
||||
ESC_DETECTOR = re.compile(b'(\033|~{)')
|
||||
WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]')
|
||||
ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252',
|
||||
'iso-8859-2': 'Windows-1250',
|
||||
'iso-8859-5': 'Windows-1251',
|
||||
'iso-8859-6': 'Windows-1256',
|
||||
'iso-8859-7': 'Windows-1253',
|
||||
'iso-8859-8': 'Windows-1255',
|
||||
'iso-8859-9': 'Windows-1254',
|
||||
'iso-8859-13': 'Windows-1257'}
|
||||
|
||||
def __init__(self, lang_filter=LanguageFilter.ALL):
|
||||
self._esc_charset_prober = None
|
||||
self._charset_probers = []
|
||||
self.result = None
|
||||
self.done = None
|
||||
self._got_data = None
|
||||
self._input_state = None
|
||||
self._last_char = None
|
||||
self.lang_filter = lang_filter
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self._has_win_bytes = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset the UniversalDetector and all of its probers back to their
|
||||
initial states. This is called by ``__init__``, so you only need to
|
||||
call this directly in between analyses of different documents.
|
||||
"""
|
||||
self.result = {'encoding': None, 'confidence': 0.0, 'language': None}
|
||||
self.done = False
|
||||
self._got_data = False
|
||||
self._has_win_bytes = False
|
||||
self._input_state = InputState.PURE_ASCII
|
||||
self._last_char = b''
|
||||
if self._esc_charset_prober:
|
||||
self._esc_charset_prober.reset()
|
||||
for prober in self._charset_probers:
|
||||
prober.reset()
|
||||
|
||||
def feed(self, byte_str):
|
||||
"""
|
||||
Takes a chunk of a document and feeds it through all of the relevant
|
||||
charset probers.
|
||||
|
||||
After calling ``feed``, you can check the value of the ``done``
|
||||
attribute to see if you need to continue feeding the
|
||||
``UniversalDetector`` more data, or if it has made a prediction
|
||||
(in the ``result`` attribute).
|
||||
|
||||
.. note::
|
||||
You should always call ``close`` when you're done feeding in your
|
||||
document if ``done`` is not already ``True``.
|
||||
"""
|
||||
if self.done:
|
||||
return
|
||||
|
||||
if not len(byte_str):
|
||||
return
|
||||
|
||||
if not isinstance(byte_str, bytearray):
|
||||
byte_str = bytearray(byte_str)
|
||||
|
||||
# First check for known BOMs, since these are guaranteed to be correct
|
||||
if not self._got_data:
|
||||
# If the data starts with BOM, we know it is UTF
|
||||
if byte_str.startswith(codecs.BOM_UTF8):
|
||||
# EF BB BF UTF-8 with BOM
|
||||
self.result = {'encoding': "UTF-8-SIG",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_UTF32_LE,
|
||||
codecs.BOM_UTF32_BE)):
|
||||
# FF FE 00 00 UTF-32, little-endian BOM
|
||||
# 00 00 FE FF UTF-32, big-endian BOM
|
||||
self.result = {'encoding': "UTF-32",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\xFE\xFF\x00\x00'):
|
||||
# FE FF 00 00 UCS-4, unusual octet order BOM (3412)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-3412",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith(b'\x00\x00\xFF\xFE'):
|
||||
# 00 00 FF FE UCS-4, unusual octet order BOM (2143)
|
||||
self.result = {'encoding': "X-ISO-10646-UCS-4-2143",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)):
|
||||
# FF FE UTF-16, little endian BOM
|
||||
# FE FF UTF-16, big endian BOM
|
||||
self.result = {'encoding': "UTF-16",
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
self._got_data = True
|
||||
if self.result['encoding'] is not None:
|
||||
self.done = True
|
||||
return
|
||||
|
||||
# If none of those matched and we've only see ASCII so far, check
|
||||
# for high bytes and escape sequences
|
||||
if self._input_state == InputState.PURE_ASCII:
|
||||
if self.HIGH_BYTE_DETECTOR.search(byte_str):
|
||||
self._input_state = InputState.HIGH_BYTE
|
||||
elif self._input_state == InputState.PURE_ASCII and \
|
||||
self.ESC_DETECTOR.search(self._last_char + byte_str):
|
||||
self._input_state = InputState.ESC_ASCII
|
||||
|
||||
self._last_char = byte_str[-1:]
|
||||
|
||||
# If we've seen escape sequences, use the EscCharSetProber, which
|
||||
# uses a simple state machine to check for known escape sequences in
|
||||
# HZ and ISO-2022 encodings, since those are the only encodings that
|
||||
# use such sequences.
|
||||
if self._input_state == InputState.ESC_ASCII:
|
||||
if not self._esc_charset_prober:
|
||||
self._esc_charset_prober = EscCharSetProber(self.lang_filter)
|
||||
if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding':
|
||||
self._esc_charset_prober.charset_name,
|
||||
'confidence':
|
||||
self._esc_charset_prober.get_confidence(),
|
||||
'language':
|
||||
self._esc_charset_prober.language}
|
||||
self.done = True
|
||||
# If we've seen high bytes (i.e., those with values greater than 127),
|
||||
# we need to do more complicated checks using all our multi-byte and
|
||||
# single-byte probers that are left. The single-byte probers
|
||||
# use character bigram distributions to determine the encoding, whereas
|
||||
# the multi-byte probers use a combination of character unigram and
|
||||
# bigram distributions.
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
if not self._charset_probers:
|
||||
self._charset_probers = [MBCSGroupProber(self.lang_filter)]
|
||||
# If we're checking non-CJK encodings, use single-byte prober
|
||||
if self.lang_filter & LanguageFilter.NON_CJK:
|
||||
self._charset_probers.append(SBCSGroupProber())
|
||||
self._charset_probers.append(Latin1Prober())
|
||||
for prober in self._charset_probers:
|
||||
if prober.feed(byte_str) == ProbingState.FOUND_IT:
|
||||
self.result = {'encoding': prober.charset_name,
|
||||
'confidence': prober.get_confidence(),
|
||||
'language': prober.language}
|
||||
self.done = True
|
||||
break
|
||||
if self.WIN_BYTE_DETECTOR.search(byte_str):
|
||||
self._has_win_bytes = True
|
||||
|
||||
def close(self):
|
||||
"""
|
||||
Stop analyzing the current document and come up with a final
|
||||
prediction.
|
||||
|
||||
:returns: The ``result`` attribute, a ``dict`` with the keys
|
||||
`encoding`, `confidence`, and `language`.
|
||||
"""
|
||||
# Don't bother with checks if we're already done
|
||||
if self.done:
|
||||
return self.result
|
||||
self.done = True
|
||||
|
||||
if not self._got_data:
|
||||
self.logger.debug('no data received!')
|
||||
|
||||
# Default to ASCII if it is all we've seen so far
|
||||
elif self._input_state == InputState.PURE_ASCII:
|
||||
self.result = {'encoding': 'ascii',
|
||||
'confidence': 1.0,
|
||||
'language': ''}
|
||||
|
||||
# If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD
|
||||
elif self._input_state == InputState.HIGH_BYTE:
|
||||
prober_confidence = None
|
||||
max_prober_confidence = 0.0
|
||||
max_prober = None
|
||||
for prober in self._charset_probers:
|
||||
if not prober:
|
||||
continue
|
||||
prober_confidence = prober.get_confidence()
|
||||
if prober_confidence > max_prober_confidence:
|
||||
max_prober_confidence = prober_confidence
|
||||
max_prober = prober
|
||||
if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD):
|
||||
charset_name = max_prober.charset_name
|
||||
lower_charset_name = max_prober.charset_name.lower()
|
||||
confidence = max_prober.get_confidence()
|
||||
# Use Windows encoding name instead of ISO-8859 if we saw any
|
||||
# extra Windows-specific bytes
|
||||
if lower_charset_name.startswith('iso-8859'):
|
||||
if self._has_win_bytes:
|
||||
charset_name = self.ISO_WIN_MAP.get(lower_charset_name,
|
||||
charset_name)
|
||||
self.result = {'encoding': charset_name,
|
||||
'confidence': confidence,
|
||||
'language': max_prober.language}
|
||||
|
||||
# Log all prober confidences if none met MINIMUM_THRESHOLD
|
||||
if self.logger.getEffectiveLevel() <= logging.DEBUG:
|
||||
if self.result['encoding'] is None:
|
||||
self.logger.debug('no probers hit minimum threshold')
|
||||
for group_prober in self._charset_probers:
|
||||
if not group_prober:
|
||||
continue
|
||||
if isinstance(group_prober, CharSetGroupProber):
|
||||
for prober in group_prober.probers:
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
prober.charset_name,
|
||||
prober.language,
|
||||
prober.get_confidence())
|
||||
else:
|
||||
self.logger.debug('%s %s confidence = %s',
|
||||
group_prober.charset_name,
|
||||
group_prober.language,
|
||||
group_prober.get_confidence())
|
||||
return self.result
|
||||
82
venv/Lib/site-packages/pip/_vendor/chardet/utf8prober.py
Normal file
82
venv/Lib/site-packages/pip/_vendor/chardet/utf8prober.py
Normal file
@@ -0,0 +1,82 @@
|
||||
######################## BEGIN LICENSE BLOCK ########################
|
||||
# The Original Code is mozilla.org code.
|
||||
#
|
||||
# The Initial Developer of the Original Code is
|
||||
# Netscape Communications Corporation.
|
||||
# Portions created by the Initial Developer are Copyright (C) 1998
|
||||
# the Initial Developer. All Rights Reserved.
|
||||
#
|
||||
# Contributor(s):
|
||||
# Mark Pilgrim - port to Python
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
# License as published by the Free Software Foundation; either
|
||||
# version 2.1 of the License, or (at your option) any later version.
|
||||
#
|
||||
# This library is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public
|
||||
# License along with this library; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
|
||||
# 02110-1301 USA
|
||||
######################### END LICENSE BLOCK #########################
|
||||
|
||||
from .charsetprober import CharSetProber
|
||||
from .enums import ProbingState, MachineState
|
||||
from .codingstatemachine import CodingStateMachine
|
||||
from .mbcssm import UTF8_SM_MODEL
|
||||
|
||||
|
||||
|
||||
class UTF8Prober(CharSetProber):
|
||||
ONE_CHAR_PROB = 0.5
|
||||
|
||||
def __init__(self):
|
||||
super(UTF8Prober, self).__init__()
|
||||
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
|
||||
self._num_mb_chars = None
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
super(UTF8Prober, self).reset()
|
||||
self.coding_sm.reset()
|
||||
self._num_mb_chars = 0
|
||||
|
||||
@property
|
||||
def charset_name(self):
|
||||
return "utf-8"
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
return ""
|
||||
|
||||
def feed(self, byte_str):
|
||||
for c in byte_str:
|
||||
coding_state = self.coding_sm.next_state(c)
|
||||
if coding_state == MachineState.ERROR:
|
||||
self._state = ProbingState.NOT_ME
|
||||
break
|
||||
elif coding_state == MachineState.ITS_ME:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
break
|
||||
elif coding_state == MachineState.START:
|
||||
if self.coding_sm.get_current_charlen() >= 2:
|
||||
self._num_mb_chars += 1
|
||||
|
||||
if self.state == ProbingState.DETECTING:
|
||||
if self.get_confidence() > self.SHORTCUT_THRESHOLD:
|
||||
self._state = ProbingState.FOUND_IT
|
||||
|
||||
return self.state
|
||||
|
||||
def get_confidence(self):
|
||||
unlike = 0.99
|
||||
if self._num_mb_chars < 6:
|
||||
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
|
||||
return 1.0 - unlike
|
||||
else:
|
||||
return unlike
|
||||
9
venv/Lib/site-packages/pip/_vendor/chardet/version.py
Normal file
9
venv/Lib/site-packages/pip/_vendor/chardet/version.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
This module exists only to simplify retrieving the version number of chardet
|
||||
from within setup.py and from chardet subpackages.
|
||||
|
||||
:author: Dan Blanchard (dan.blanchard@gmail.com)
|
||||
"""
|
||||
|
||||
__version__ = "4.0.0"
|
||||
VERSION = __version__.split('.')
|
||||
6
venv/Lib/site-packages/pip/_vendor/colorama/__init__.py
Normal file
6
venv/Lib/site-packages/pip/_vendor/colorama/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
from .initialise import init, deinit, reinit, colorama_text
|
||||
from .ansi import Fore, Back, Style, Cursor
|
||||
from .ansitowin32 import AnsiToWin32
|
||||
|
||||
__version__ = '0.4.4'
|
||||
102
venv/Lib/site-packages/pip/_vendor/colorama/ansi.py
Normal file
102
venv/Lib/site-packages/pip/_vendor/colorama/ansi.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
'''
|
||||
This module generates ANSI character codes to printing colors to terminals.
|
||||
See: http://en.wikipedia.org/wiki/ANSI_escape_code
|
||||
'''
|
||||
|
||||
CSI = '\033['
|
||||
OSC = '\033]'
|
||||
BEL = '\a'
|
||||
|
||||
|
||||
def code_to_chars(code):
|
||||
return CSI + str(code) + 'm'
|
||||
|
||||
def set_title(title):
|
||||
return OSC + '2;' + title + BEL
|
||||
|
||||
def clear_screen(mode=2):
|
||||
return CSI + str(mode) + 'J'
|
||||
|
||||
def clear_line(mode=2):
|
||||
return CSI + str(mode) + 'K'
|
||||
|
||||
|
||||
class AnsiCodes(object):
|
||||
def __init__(self):
|
||||
# the subclasses declare class attributes which are numbers.
|
||||
# Upon instantiation we define instance attributes, which are the same
|
||||
# as the class attributes but wrapped with the ANSI escape sequence
|
||||
for name in dir(self):
|
||||
if not name.startswith('_'):
|
||||
value = getattr(self, name)
|
||||
setattr(self, name, code_to_chars(value))
|
||||
|
||||
|
||||
class AnsiCursor(object):
|
||||
def UP(self, n=1):
|
||||
return CSI + str(n) + 'A'
|
||||
def DOWN(self, n=1):
|
||||
return CSI + str(n) + 'B'
|
||||
def FORWARD(self, n=1):
|
||||
return CSI + str(n) + 'C'
|
||||
def BACK(self, n=1):
|
||||
return CSI + str(n) + 'D'
|
||||
def POS(self, x=1, y=1):
|
||||
return CSI + str(y) + ';' + str(x) + 'H'
|
||||
|
||||
|
||||
class AnsiFore(AnsiCodes):
|
||||
BLACK = 30
|
||||
RED = 31
|
||||
GREEN = 32
|
||||
YELLOW = 33
|
||||
BLUE = 34
|
||||
MAGENTA = 35
|
||||
CYAN = 36
|
||||
WHITE = 37
|
||||
RESET = 39
|
||||
|
||||
# These are fairly well supported, but not part of the standard.
|
||||
LIGHTBLACK_EX = 90
|
||||
LIGHTRED_EX = 91
|
||||
LIGHTGREEN_EX = 92
|
||||
LIGHTYELLOW_EX = 93
|
||||
LIGHTBLUE_EX = 94
|
||||
LIGHTMAGENTA_EX = 95
|
||||
LIGHTCYAN_EX = 96
|
||||
LIGHTWHITE_EX = 97
|
||||
|
||||
|
||||
class AnsiBack(AnsiCodes):
|
||||
BLACK = 40
|
||||
RED = 41
|
||||
GREEN = 42
|
||||
YELLOW = 43
|
||||
BLUE = 44
|
||||
MAGENTA = 45
|
||||
CYAN = 46
|
||||
WHITE = 47
|
||||
RESET = 49
|
||||
|
||||
# These are fairly well supported, but not part of the standard.
|
||||
LIGHTBLACK_EX = 100
|
||||
LIGHTRED_EX = 101
|
||||
LIGHTGREEN_EX = 102
|
||||
LIGHTYELLOW_EX = 103
|
||||
LIGHTBLUE_EX = 104
|
||||
LIGHTMAGENTA_EX = 105
|
||||
LIGHTCYAN_EX = 106
|
||||
LIGHTWHITE_EX = 107
|
||||
|
||||
|
||||
class AnsiStyle(AnsiCodes):
|
||||
BRIGHT = 1
|
||||
DIM = 2
|
||||
NORMAL = 22
|
||||
RESET_ALL = 0
|
||||
|
||||
Fore = AnsiFore()
|
||||
Back = AnsiBack()
|
||||
Style = AnsiStyle()
|
||||
Cursor = AnsiCursor()
|
||||
258
venv/Lib/site-packages/pip/_vendor/colorama/ansitowin32.py
Normal file
258
venv/Lib/site-packages/pip/_vendor/colorama/ansitowin32.py
Normal file
@@ -0,0 +1,258 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style, BEL
|
||||
from .winterm import WinTerm, WinColor, WinStyle
|
||||
from .win32 import windll, winapi_test
|
||||
|
||||
|
||||
winterm = None
|
||||
if windll is not None:
|
||||
winterm = WinTerm()
|
||||
|
||||
|
||||
class StreamWrapper(object):
|
||||
'''
|
||||
Wraps a stream (such as stdout), acting as a transparent proxy for all
|
||||
attribute access apart from method 'write()', which is delegated to our
|
||||
Converter instance.
|
||||
'''
|
||||
def __init__(self, wrapped, converter):
|
||||
# double-underscore everything to prevent clashes with names of
|
||||
# attributes on the wrapped stream object.
|
||||
self.__wrapped = wrapped
|
||||
self.__convertor = converter
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self.__wrapped, name)
|
||||
|
||||
def __enter__(self, *args, **kwargs):
|
||||
# special method lookup bypasses __getattr__/__getattribute__, see
|
||||
# https://stackoverflow.com/questions/12632894/why-doesnt-getattr-work-with-exit
|
||||
# thus, contextlib magic methods are not proxied via __getattr__
|
||||
return self.__wrapped.__enter__(*args, **kwargs)
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
return self.__wrapped.__exit__(*args, **kwargs)
|
||||
|
||||
def write(self, text):
|
||||
self.__convertor.write(text)
|
||||
|
||||
def isatty(self):
|
||||
stream = self.__wrapped
|
||||
if 'PYCHARM_HOSTED' in os.environ:
|
||||
if stream is not None and (stream is sys.__stdout__ or stream is sys.__stderr__):
|
||||
return True
|
||||
try:
|
||||
stream_isatty = stream.isatty
|
||||
except AttributeError:
|
||||
return False
|
||||
else:
|
||||
return stream_isatty()
|
||||
|
||||
@property
|
||||
def closed(self):
|
||||
stream = self.__wrapped
|
||||
try:
|
||||
return stream.closed
|
||||
except AttributeError:
|
||||
return True
|
||||
|
||||
|
||||
class AnsiToWin32(object):
|
||||
'''
|
||||
Implements a 'write()' method which, on Windows, will strip ANSI character
|
||||
sequences from the text, and if outputting to a tty, will convert them into
|
||||
win32 function calls.
|
||||
'''
|
||||
ANSI_CSI_RE = re.compile('\001?\033\\[((?:\\d|;)*)([a-zA-Z])\002?') # Control Sequence Introducer
|
||||
ANSI_OSC_RE = re.compile('\001?\033\\]([^\a]*)(\a)\002?') # Operating System Command
|
||||
|
||||
def __init__(self, wrapped, convert=None, strip=None, autoreset=False):
|
||||
# The wrapped stream (normally sys.stdout or sys.stderr)
|
||||
self.wrapped = wrapped
|
||||
|
||||
# should we reset colors to defaults after every .write()
|
||||
self.autoreset = autoreset
|
||||
|
||||
# create the proxy wrapping our output stream
|
||||
self.stream = StreamWrapper(wrapped, self)
|
||||
|
||||
on_windows = os.name == 'nt'
|
||||
# We test if the WinAPI works, because even if we are on Windows
|
||||
# we may be using a terminal that doesn't support the WinAPI
|
||||
# (e.g. Cygwin Terminal). In this case it's up to the terminal
|
||||
# to support the ANSI codes.
|
||||
conversion_supported = on_windows and winapi_test()
|
||||
|
||||
# should we strip ANSI sequences from our output?
|
||||
if strip is None:
|
||||
strip = conversion_supported or (not self.stream.closed and not self.stream.isatty())
|
||||
self.strip = strip
|
||||
|
||||
# should we should convert ANSI sequences into win32 calls?
|
||||
if convert is None:
|
||||
convert = conversion_supported and not self.stream.closed and self.stream.isatty()
|
||||
self.convert = convert
|
||||
|
||||
# dict of ansi codes to win32 functions and parameters
|
||||
self.win32_calls = self.get_win32_calls()
|
||||
|
||||
# are we wrapping stderr?
|
||||
self.on_stderr = self.wrapped is sys.stderr
|
||||
|
||||
def should_wrap(self):
|
||||
'''
|
||||
True if this class is actually needed. If false, then the output
|
||||
stream will not be affected, nor will win32 calls be issued, so
|
||||
wrapping stdout is not actually required. This will generally be
|
||||
False on non-Windows platforms, unless optional functionality like
|
||||
autoreset has been requested using kwargs to init()
|
||||
'''
|
||||
return self.convert or self.strip or self.autoreset
|
||||
|
||||
def get_win32_calls(self):
|
||||
if self.convert and winterm:
|
||||
return {
|
||||
AnsiStyle.RESET_ALL: (winterm.reset_all, ),
|
||||
AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT),
|
||||
AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL),
|
||||
AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL),
|
||||
AnsiFore.BLACK: (winterm.fore, WinColor.BLACK),
|
||||
AnsiFore.RED: (winterm.fore, WinColor.RED),
|
||||
AnsiFore.GREEN: (winterm.fore, WinColor.GREEN),
|
||||
AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW),
|
||||
AnsiFore.BLUE: (winterm.fore, WinColor.BLUE),
|
||||
AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA),
|
||||
AnsiFore.CYAN: (winterm.fore, WinColor.CYAN),
|
||||
AnsiFore.WHITE: (winterm.fore, WinColor.GREY),
|
||||
AnsiFore.RESET: (winterm.fore, ),
|
||||
AnsiFore.LIGHTBLACK_EX: (winterm.fore, WinColor.BLACK, True),
|
||||
AnsiFore.LIGHTRED_EX: (winterm.fore, WinColor.RED, True),
|
||||
AnsiFore.LIGHTGREEN_EX: (winterm.fore, WinColor.GREEN, True),
|
||||
AnsiFore.LIGHTYELLOW_EX: (winterm.fore, WinColor.YELLOW, True),
|
||||
AnsiFore.LIGHTBLUE_EX: (winterm.fore, WinColor.BLUE, True),
|
||||
AnsiFore.LIGHTMAGENTA_EX: (winterm.fore, WinColor.MAGENTA, True),
|
||||
AnsiFore.LIGHTCYAN_EX: (winterm.fore, WinColor.CYAN, True),
|
||||
AnsiFore.LIGHTWHITE_EX: (winterm.fore, WinColor.GREY, True),
|
||||
AnsiBack.BLACK: (winterm.back, WinColor.BLACK),
|
||||
AnsiBack.RED: (winterm.back, WinColor.RED),
|
||||
AnsiBack.GREEN: (winterm.back, WinColor.GREEN),
|
||||
AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW),
|
||||
AnsiBack.BLUE: (winterm.back, WinColor.BLUE),
|
||||
AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA),
|
||||
AnsiBack.CYAN: (winterm.back, WinColor.CYAN),
|
||||
AnsiBack.WHITE: (winterm.back, WinColor.GREY),
|
||||
AnsiBack.RESET: (winterm.back, ),
|
||||
AnsiBack.LIGHTBLACK_EX: (winterm.back, WinColor.BLACK, True),
|
||||
AnsiBack.LIGHTRED_EX: (winterm.back, WinColor.RED, True),
|
||||
AnsiBack.LIGHTGREEN_EX: (winterm.back, WinColor.GREEN, True),
|
||||
AnsiBack.LIGHTYELLOW_EX: (winterm.back, WinColor.YELLOW, True),
|
||||
AnsiBack.LIGHTBLUE_EX: (winterm.back, WinColor.BLUE, True),
|
||||
AnsiBack.LIGHTMAGENTA_EX: (winterm.back, WinColor.MAGENTA, True),
|
||||
AnsiBack.LIGHTCYAN_EX: (winterm.back, WinColor.CYAN, True),
|
||||
AnsiBack.LIGHTWHITE_EX: (winterm.back, WinColor.GREY, True),
|
||||
}
|
||||
return dict()
|
||||
|
||||
def write(self, text):
|
||||
if self.strip or self.convert:
|
||||
self.write_and_convert(text)
|
||||
else:
|
||||
self.wrapped.write(text)
|
||||
self.wrapped.flush()
|
||||
if self.autoreset:
|
||||
self.reset_all()
|
||||
|
||||
|
||||
def reset_all(self):
|
||||
if self.convert:
|
||||
self.call_win32('m', (0,))
|
||||
elif not self.strip and not self.stream.closed:
|
||||
self.wrapped.write(Style.RESET_ALL)
|
||||
|
||||
|
||||
def write_and_convert(self, text):
|
||||
'''
|
||||
Write the given text to our wrapped stream, stripping any ANSI
|
||||
sequences from the text, and optionally converting them into win32
|
||||
calls.
|
||||
'''
|
||||
cursor = 0
|
||||
text = self.convert_osc(text)
|
||||
for match in self.ANSI_CSI_RE.finditer(text):
|
||||
start, end = match.span()
|
||||
self.write_plain_text(text, cursor, start)
|
||||
self.convert_ansi(*match.groups())
|
||||
cursor = end
|
||||
self.write_plain_text(text, cursor, len(text))
|
||||
|
||||
|
||||
def write_plain_text(self, text, start, end):
|
||||
if start < end:
|
||||
self.wrapped.write(text[start:end])
|
||||
self.wrapped.flush()
|
||||
|
||||
|
||||
def convert_ansi(self, paramstring, command):
|
||||
if self.convert:
|
||||
params = self.extract_params(command, paramstring)
|
||||
self.call_win32(command, params)
|
||||
|
||||
|
||||
def extract_params(self, command, paramstring):
|
||||
if command in 'Hf':
|
||||
params = tuple(int(p) if len(p) != 0 else 1 for p in paramstring.split(';'))
|
||||
while len(params) < 2:
|
||||
# defaults:
|
||||
params = params + (1,)
|
||||
else:
|
||||
params = tuple(int(p) for p in paramstring.split(';') if len(p) != 0)
|
||||
if len(params) == 0:
|
||||
# defaults:
|
||||
if command in 'JKm':
|
||||
params = (0,)
|
||||
elif command in 'ABCD':
|
||||
params = (1,)
|
||||
|
||||
return params
|
||||
|
||||
|
||||
def call_win32(self, command, params):
|
||||
if command == 'm':
|
||||
for param in params:
|
||||
if param in self.win32_calls:
|
||||
func_args = self.win32_calls[param]
|
||||
func = func_args[0]
|
||||
args = func_args[1:]
|
||||
kwargs = dict(on_stderr=self.on_stderr)
|
||||
func(*args, **kwargs)
|
||||
elif command in 'J':
|
||||
winterm.erase_screen(params[0], on_stderr=self.on_stderr)
|
||||
elif command in 'K':
|
||||
winterm.erase_line(params[0], on_stderr=self.on_stderr)
|
||||
elif command in 'Hf': # cursor position - absolute
|
||||
winterm.set_cursor_position(params, on_stderr=self.on_stderr)
|
||||
elif command in 'ABCD': # cursor position - relative
|
||||
n = params[0]
|
||||
# A - up, B - down, C - forward, D - back
|
||||
x, y = {'A': (0, -n), 'B': (0, n), 'C': (n, 0), 'D': (-n, 0)}[command]
|
||||
winterm.cursor_adjust(x, y, on_stderr=self.on_stderr)
|
||||
|
||||
|
||||
def convert_osc(self, text):
|
||||
for match in self.ANSI_OSC_RE.finditer(text):
|
||||
start, end = match.span()
|
||||
text = text[:start] + text[end:]
|
||||
paramstring, command = match.groups()
|
||||
if command == BEL:
|
||||
if paramstring.count(";") == 1:
|
||||
params = paramstring.split(";")
|
||||
# 0 - change title and icon (we will only change title)
|
||||
# 1 - change icon (we don't support this)
|
||||
# 2 - change title
|
||||
if params[0] in '02':
|
||||
winterm.set_title(params[1])
|
||||
return text
|
||||
80
venv/Lib/site-packages/pip/_vendor/colorama/initialise.py
Normal file
80
venv/Lib/site-packages/pip/_vendor/colorama/initialise.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
import atexit
|
||||
import contextlib
|
||||
import sys
|
||||
|
||||
from .ansitowin32 import AnsiToWin32
|
||||
|
||||
|
||||
orig_stdout = None
|
||||
orig_stderr = None
|
||||
|
||||
wrapped_stdout = None
|
||||
wrapped_stderr = None
|
||||
|
||||
atexit_done = False
|
||||
|
||||
|
||||
def reset_all():
|
||||
if AnsiToWin32 is not None: # Issue #74: objects might become None at exit
|
||||
AnsiToWin32(orig_stdout).reset_all()
|
||||
|
||||
|
||||
def init(autoreset=False, convert=None, strip=None, wrap=True):
|
||||
|
||||
if not wrap and any([autoreset, convert, strip]):
|
||||
raise ValueError('wrap=False conflicts with any other arg=True')
|
||||
|
||||
global wrapped_stdout, wrapped_stderr
|
||||
global orig_stdout, orig_stderr
|
||||
|
||||
orig_stdout = sys.stdout
|
||||
orig_stderr = sys.stderr
|
||||
|
||||
if sys.stdout is None:
|
||||
wrapped_stdout = None
|
||||
else:
|
||||
sys.stdout = wrapped_stdout = \
|
||||
wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
|
||||
if sys.stderr is None:
|
||||
wrapped_stderr = None
|
||||
else:
|
||||
sys.stderr = wrapped_stderr = \
|
||||
wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
|
||||
|
||||
global atexit_done
|
||||
if not atexit_done:
|
||||
atexit.register(reset_all)
|
||||
atexit_done = True
|
||||
|
||||
|
||||
def deinit():
|
||||
if orig_stdout is not None:
|
||||
sys.stdout = orig_stdout
|
||||
if orig_stderr is not None:
|
||||
sys.stderr = orig_stderr
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def colorama_text(*args, **kwargs):
|
||||
init(*args, **kwargs)
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
deinit()
|
||||
|
||||
|
||||
def reinit():
|
||||
if wrapped_stdout is not None:
|
||||
sys.stdout = wrapped_stdout
|
||||
if wrapped_stderr is not None:
|
||||
sys.stderr = wrapped_stderr
|
||||
|
||||
|
||||
def wrap_stream(stream, convert, strip, autoreset, wrap):
|
||||
if wrap:
|
||||
wrapper = AnsiToWin32(stream,
|
||||
convert=convert, strip=strip, autoreset=autoreset)
|
||||
if wrapper.should_wrap():
|
||||
stream = wrapper.stream
|
||||
return stream
|
||||
152
venv/Lib/site-packages/pip/_vendor/colorama/win32.py
Normal file
152
venv/Lib/site-packages/pip/_vendor/colorama/win32.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
|
||||
# from winbase.h
|
||||
STDOUT = -11
|
||||
STDERR = -12
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
from ctypes import LibraryLoader
|
||||
windll = LibraryLoader(ctypes.WinDLL)
|
||||
from ctypes import wintypes
|
||||
except (AttributeError, ImportError):
|
||||
windll = None
|
||||
SetConsoleTextAttribute = lambda *_: None
|
||||
winapi_test = lambda *_: None
|
||||
else:
|
||||
from ctypes import byref, Structure, c_char, POINTER
|
||||
|
||||
COORD = wintypes._COORD
|
||||
|
||||
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
|
||||
"""struct in wincon.h."""
|
||||
_fields_ = [
|
||||
("dwSize", COORD),
|
||||
("dwCursorPosition", COORD),
|
||||
("wAttributes", wintypes.WORD),
|
||||
("srWindow", wintypes.SMALL_RECT),
|
||||
("dwMaximumWindowSize", COORD),
|
||||
]
|
||||
def __str__(self):
|
||||
return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
|
||||
self.dwSize.Y, self.dwSize.X
|
||||
, self.dwCursorPosition.Y, self.dwCursorPosition.X
|
||||
, self.wAttributes
|
||||
, self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
|
||||
, self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
|
||||
)
|
||||
|
||||
_GetStdHandle = windll.kernel32.GetStdHandle
|
||||
_GetStdHandle.argtypes = [
|
||||
wintypes.DWORD,
|
||||
]
|
||||
_GetStdHandle.restype = wintypes.HANDLE
|
||||
|
||||
_GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
|
||||
_GetConsoleScreenBufferInfo.argtypes = [
|
||||
wintypes.HANDLE,
|
||||
POINTER(CONSOLE_SCREEN_BUFFER_INFO),
|
||||
]
|
||||
_GetConsoleScreenBufferInfo.restype = wintypes.BOOL
|
||||
|
||||
_SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
|
||||
_SetConsoleTextAttribute.argtypes = [
|
||||
wintypes.HANDLE,
|
||||
wintypes.WORD,
|
||||
]
|
||||
_SetConsoleTextAttribute.restype = wintypes.BOOL
|
||||
|
||||
_SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
|
||||
_SetConsoleCursorPosition.argtypes = [
|
||||
wintypes.HANDLE,
|
||||
COORD,
|
||||
]
|
||||
_SetConsoleCursorPosition.restype = wintypes.BOOL
|
||||
|
||||
_FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
|
||||
_FillConsoleOutputCharacterA.argtypes = [
|
||||
wintypes.HANDLE,
|
||||
c_char,
|
||||
wintypes.DWORD,
|
||||
COORD,
|
||||
POINTER(wintypes.DWORD),
|
||||
]
|
||||
_FillConsoleOutputCharacterA.restype = wintypes.BOOL
|
||||
|
||||
_FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
|
||||
_FillConsoleOutputAttribute.argtypes = [
|
||||
wintypes.HANDLE,
|
||||
wintypes.WORD,
|
||||
wintypes.DWORD,
|
||||
COORD,
|
||||
POINTER(wintypes.DWORD),
|
||||
]
|
||||
_FillConsoleOutputAttribute.restype = wintypes.BOOL
|
||||
|
||||
_SetConsoleTitleW = windll.kernel32.SetConsoleTitleW
|
||||
_SetConsoleTitleW.argtypes = [
|
||||
wintypes.LPCWSTR
|
||||
]
|
||||
_SetConsoleTitleW.restype = wintypes.BOOL
|
||||
|
||||
def _winapi_test(handle):
|
||||
csbi = CONSOLE_SCREEN_BUFFER_INFO()
|
||||
success = _GetConsoleScreenBufferInfo(
|
||||
handle, byref(csbi))
|
||||
return bool(success)
|
||||
|
||||
def winapi_test():
|
||||
return any(_winapi_test(h) for h in
|
||||
(_GetStdHandle(STDOUT), _GetStdHandle(STDERR)))
|
||||
|
||||
def GetConsoleScreenBufferInfo(stream_id=STDOUT):
|
||||
handle = _GetStdHandle(stream_id)
|
||||
csbi = CONSOLE_SCREEN_BUFFER_INFO()
|
||||
success = _GetConsoleScreenBufferInfo(
|
||||
handle, byref(csbi))
|
||||
return csbi
|
||||
|
||||
def SetConsoleTextAttribute(stream_id, attrs):
|
||||
handle = _GetStdHandle(stream_id)
|
||||
return _SetConsoleTextAttribute(handle, attrs)
|
||||
|
||||
def SetConsoleCursorPosition(stream_id, position, adjust=True):
|
||||
position = COORD(*position)
|
||||
# If the position is out of range, do nothing.
|
||||
if position.Y <= 0 or position.X <= 0:
|
||||
return
|
||||
# Adjust for Windows' SetConsoleCursorPosition:
|
||||
# 1. being 0-based, while ANSI is 1-based.
|
||||
# 2. expecting (x,y), while ANSI uses (y,x).
|
||||
adjusted_position = COORD(position.Y - 1, position.X - 1)
|
||||
if adjust:
|
||||
# Adjust for viewport's scroll position
|
||||
sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
|
||||
adjusted_position.Y += sr.Top
|
||||
adjusted_position.X += sr.Left
|
||||
# Resume normal processing
|
||||
handle = _GetStdHandle(stream_id)
|
||||
return _SetConsoleCursorPosition(handle, adjusted_position)
|
||||
|
||||
def FillConsoleOutputCharacter(stream_id, char, length, start):
|
||||
handle = _GetStdHandle(stream_id)
|
||||
char = c_char(char.encode())
|
||||
length = wintypes.DWORD(length)
|
||||
num_written = wintypes.DWORD(0)
|
||||
# Note that this is hard-coded for ANSI (vs wide) bytes.
|
||||
success = _FillConsoleOutputCharacterA(
|
||||
handle, char, length, start, byref(num_written))
|
||||
return num_written.value
|
||||
|
||||
def FillConsoleOutputAttribute(stream_id, attr, length, start):
|
||||
''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
|
||||
handle = _GetStdHandle(stream_id)
|
||||
attribute = wintypes.WORD(attr)
|
||||
length = wintypes.DWORD(length)
|
||||
num_written = wintypes.DWORD(0)
|
||||
# Note that this is hard-coded for ANSI (vs wide) bytes.
|
||||
return _FillConsoleOutputAttribute(
|
||||
handle, attribute, length, start, byref(num_written))
|
||||
|
||||
def SetConsoleTitle(title):
|
||||
return _SetConsoleTitleW(title)
|
||||
169
venv/Lib/site-packages/pip/_vendor/colorama/winterm.py
Normal file
169
venv/Lib/site-packages/pip/_vendor/colorama/winterm.py
Normal file
@@ -0,0 +1,169 @@
|
||||
# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
|
||||
from . import win32
|
||||
|
||||
|
||||
# from wincon.h
|
||||
class WinColor(object):
|
||||
BLACK = 0
|
||||
BLUE = 1
|
||||
GREEN = 2
|
||||
CYAN = 3
|
||||
RED = 4
|
||||
MAGENTA = 5
|
||||
YELLOW = 6
|
||||
GREY = 7
|
||||
|
||||
# from wincon.h
|
||||
class WinStyle(object):
|
||||
NORMAL = 0x00 # dim text, dim background
|
||||
BRIGHT = 0x08 # bright text, dim background
|
||||
BRIGHT_BACKGROUND = 0x80 # dim text, bright background
|
||||
|
||||
class WinTerm(object):
|
||||
|
||||
def __init__(self):
|
||||
self._default = win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes
|
||||
self.set_attrs(self._default)
|
||||
self._default_fore = self._fore
|
||||
self._default_back = self._back
|
||||
self._default_style = self._style
|
||||
# In order to emulate LIGHT_EX in windows, we borrow the BRIGHT style.
|
||||
# So that LIGHT_EX colors and BRIGHT style do not clobber each other,
|
||||
# we track them separately, since LIGHT_EX is overwritten by Fore/Back
|
||||
# and BRIGHT is overwritten by Style codes.
|
||||
self._light = 0
|
||||
|
||||
def get_attrs(self):
|
||||
return self._fore + self._back * 16 + (self._style | self._light)
|
||||
|
||||
def set_attrs(self, value):
|
||||
self._fore = value & 7
|
||||
self._back = (value >> 4) & 7
|
||||
self._style = value & (WinStyle.BRIGHT | WinStyle.BRIGHT_BACKGROUND)
|
||||
|
||||
def reset_all(self, on_stderr=None):
|
||||
self.set_attrs(self._default)
|
||||
self.set_console(attrs=self._default)
|
||||
self._light = 0
|
||||
|
||||
def fore(self, fore=None, light=False, on_stderr=False):
|
||||
if fore is None:
|
||||
fore = self._default_fore
|
||||
self._fore = fore
|
||||
# Emulate LIGHT_EX with BRIGHT Style
|
||||
if light:
|
||||
self._light |= WinStyle.BRIGHT
|
||||
else:
|
||||
self._light &= ~WinStyle.BRIGHT
|
||||
self.set_console(on_stderr=on_stderr)
|
||||
|
||||
def back(self, back=None, light=False, on_stderr=False):
|
||||
if back is None:
|
||||
back = self._default_back
|
||||
self._back = back
|
||||
# Emulate LIGHT_EX with BRIGHT_BACKGROUND Style
|
||||
if light:
|
||||
self._light |= WinStyle.BRIGHT_BACKGROUND
|
||||
else:
|
||||
self._light &= ~WinStyle.BRIGHT_BACKGROUND
|
||||
self.set_console(on_stderr=on_stderr)
|
||||
|
||||
def style(self, style=None, on_stderr=False):
|
||||
if style is None:
|
||||
style = self._default_style
|
||||
self._style = style
|
||||
self.set_console(on_stderr=on_stderr)
|
||||
|
||||
def set_console(self, attrs=None, on_stderr=False):
|
||||
if attrs is None:
|
||||
attrs = self.get_attrs()
|
||||
handle = win32.STDOUT
|
||||
if on_stderr:
|
||||
handle = win32.STDERR
|
||||
win32.SetConsoleTextAttribute(handle, attrs)
|
||||
|
||||
def get_position(self, handle):
|
||||
position = win32.GetConsoleScreenBufferInfo(handle).dwCursorPosition
|
||||
# Because Windows coordinates are 0-based,
|
||||
# and win32.SetConsoleCursorPosition expects 1-based.
|
||||
position.X += 1
|
||||
position.Y += 1
|
||||
return position
|
||||
|
||||
def set_cursor_position(self, position=None, on_stderr=False):
|
||||
if position is None:
|
||||
# I'm not currently tracking the position, so there is no default.
|
||||
# position = self.get_position()
|
||||
return
|
||||
handle = win32.STDOUT
|
||||
if on_stderr:
|
||||
handle = win32.STDERR
|
||||
win32.SetConsoleCursorPosition(handle, position)
|
||||
|
||||
def cursor_adjust(self, x, y, on_stderr=False):
|
||||
handle = win32.STDOUT
|
||||
if on_stderr:
|
||||
handle = win32.STDERR
|
||||
position = self.get_position(handle)
|
||||
adjusted_position = (position.Y + y, position.X + x)
|
||||
win32.SetConsoleCursorPosition(handle, adjusted_position, adjust=False)
|
||||
|
||||
def erase_screen(self, mode=0, on_stderr=False):
|
||||
# 0 should clear from the cursor to the end of the screen.
|
||||
# 1 should clear from the cursor to the beginning of the screen.
|
||||
# 2 should clear the entire screen, and move cursor to (1,1)
|
||||
handle = win32.STDOUT
|
||||
if on_stderr:
|
||||
handle = win32.STDERR
|
||||
csbi = win32.GetConsoleScreenBufferInfo(handle)
|
||||
# get the number of character cells in the current buffer
|
||||
cells_in_screen = csbi.dwSize.X * csbi.dwSize.Y
|
||||
# get number of character cells before current cursor position
|
||||
cells_before_cursor = csbi.dwSize.X * csbi.dwCursorPosition.Y + csbi.dwCursorPosition.X
|
||||
if mode == 0:
|
||||
from_coord = csbi.dwCursorPosition
|
||||
cells_to_erase = cells_in_screen - cells_before_cursor
|
||||
elif mode == 1:
|
||||
from_coord = win32.COORD(0, 0)
|
||||
cells_to_erase = cells_before_cursor
|
||||
elif mode == 2:
|
||||
from_coord = win32.COORD(0, 0)
|
||||
cells_to_erase = cells_in_screen
|
||||
else:
|
||||
# invalid mode
|
||||
return
|
||||
# fill the entire screen with blanks
|
||||
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
|
||||
# now set the buffer's attributes accordingly
|
||||
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
|
||||
if mode == 2:
|
||||
# put the cursor where needed
|
||||
win32.SetConsoleCursorPosition(handle, (1, 1))
|
||||
|
||||
def erase_line(self, mode=0, on_stderr=False):
|
||||
# 0 should clear from the cursor to the end of the line.
|
||||
# 1 should clear from the cursor to the beginning of the line.
|
||||
# 2 should clear the entire line.
|
||||
handle = win32.STDOUT
|
||||
if on_stderr:
|
||||
handle = win32.STDERR
|
||||
csbi = win32.GetConsoleScreenBufferInfo(handle)
|
||||
if mode == 0:
|
||||
from_coord = csbi.dwCursorPosition
|
||||
cells_to_erase = csbi.dwSize.X - csbi.dwCursorPosition.X
|
||||
elif mode == 1:
|
||||
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
|
||||
cells_to_erase = csbi.dwCursorPosition.X
|
||||
elif mode == 2:
|
||||
from_coord = win32.COORD(0, csbi.dwCursorPosition.Y)
|
||||
cells_to_erase = csbi.dwSize.X
|
||||
else:
|
||||
# invalid mode
|
||||
return
|
||||
# fill the entire screen with blanks
|
||||
win32.FillConsoleOutputCharacter(handle, ' ', cells_to_erase, from_coord)
|
||||
# now set the buffer's attributes accordingly
|
||||
win32.FillConsoleOutputAttribute(handle, self.get_attrs(), cells_to_erase, from_coord)
|
||||
|
||||
def set_title(self, title):
|
||||
win32.SetConsoleTitle(title)
|
||||
23
venv/Lib/site-packages/pip/_vendor/distlib/__init__.py
Normal file
23
venv/Lib/site-packages/pip/_vendor/distlib/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012-2019 Vinay Sajip.
|
||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
import logging
|
||||
|
||||
__version__ = '0.3.2'
|
||||
|
||||
class DistlibException(Exception):
|
||||
pass
|
||||
|
||||
try:
|
||||
from logging import NullHandler
|
||||
except ImportError: # pragma: no cover
|
||||
class NullHandler(logging.Handler):
|
||||
def handle(self, record): pass
|
||||
def emit(self, record): pass
|
||||
def createLock(self): self.lock = None
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.addHandler(NullHandler())
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Modules copied from Python 3 standard libraries, for internal use only.
|
||||
|
||||
Individual classes and functions are found in d2._backport.misc. Intended
|
||||
usage is to always import things missing from 3.1 from that module: the
|
||||
built-in/stdlib objects will be used if found.
|
||||
"""
|
||||
41
venv/Lib/site-packages/pip/_vendor/distlib/_backport/misc.py
Normal file
41
venv/Lib/site-packages/pip/_vendor/distlib/_backport/misc.py
Normal file
@@ -0,0 +1,41 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012 The Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""Backports for individual classes and functions."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
__all__ = ['cache_from_source', 'callable', 'fsencode']
|
||||
|
||||
|
||||
try:
|
||||
from imp import cache_from_source
|
||||
except ImportError:
|
||||
def cache_from_source(py_file, debug=__debug__):
|
||||
ext = debug and 'c' or 'o'
|
||||
return py_file + ext
|
||||
|
||||
|
||||
try:
|
||||
callable = callable
|
||||
except NameError:
|
||||
from collections import Callable
|
||||
|
||||
def callable(obj):
|
||||
return isinstance(obj, Callable)
|
||||
|
||||
|
||||
try:
|
||||
fsencode = os.fsencode
|
||||
except AttributeError:
|
||||
def fsencode(filename):
|
||||
if isinstance(filename, bytes):
|
||||
return filename
|
||||
elif isinstance(filename, str):
|
||||
return filename.encode(sys.getfilesystemencoding())
|
||||
else:
|
||||
raise TypeError("expect bytes or str, not %s" %
|
||||
type(filename).__name__)
|
||||
764
venv/Lib/site-packages/pip/_vendor/distlib/_backport/shutil.py
Normal file
764
venv/Lib/site-packages/pip/_vendor/distlib/_backport/shutil.py
Normal file
@@ -0,0 +1,764 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012 The Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""Utility functions for copying and archiving files and directory trees.
|
||||
|
||||
XXX The functions here don't copy the resource fork or other metadata on Mac.
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import stat
|
||||
from os.path import abspath
|
||||
import fnmatch
|
||||
try:
|
||||
from collections.abc import Callable
|
||||
except ImportError:
|
||||
from collections import Callable
|
||||
import errno
|
||||
from . import tarfile
|
||||
|
||||
try:
|
||||
import bz2
|
||||
_BZ2_SUPPORTED = True
|
||||
except ImportError:
|
||||
_BZ2_SUPPORTED = False
|
||||
|
||||
try:
|
||||
from pwd import getpwnam
|
||||
except ImportError:
|
||||
getpwnam = None
|
||||
|
||||
try:
|
||||
from grp import getgrnam
|
||||
except ImportError:
|
||||
getgrnam = None
|
||||
|
||||
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
|
||||
"copytree", "move", "rmtree", "Error", "SpecialFileError",
|
||||
"ExecError", "make_archive", "get_archive_formats",
|
||||
"register_archive_format", "unregister_archive_format",
|
||||
"get_unpack_formats", "register_unpack_format",
|
||||
"unregister_unpack_format", "unpack_archive", "ignore_patterns"]
|
||||
|
||||
class Error(EnvironmentError):
|
||||
pass
|
||||
|
||||
class SpecialFileError(EnvironmentError):
|
||||
"""Raised when trying to do a kind of operation (e.g. copying) which is
|
||||
not supported on a special file (e.g. a named pipe)"""
|
||||
|
||||
class ExecError(EnvironmentError):
|
||||
"""Raised when a command could not be executed"""
|
||||
|
||||
class ReadError(EnvironmentError):
|
||||
"""Raised when an archive cannot be read"""
|
||||
|
||||
class RegistryError(Exception):
|
||||
"""Raised when a registry operation with the archiving
|
||||
and unpacking registries fails"""
|
||||
|
||||
|
||||
try:
|
||||
WindowsError
|
||||
except NameError:
|
||||
WindowsError = None
|
||||
|
||||
def copyfileobj(fsrc, fdst, length=16*1024):
|
||||
"""copy data from file-like object fsrc to file-like object fdst"""
|
||||
while 1:
|
||||
buf = fsrc.read(length)
|
||||
if not buf:
|
||||
break
|
||||
fdst.write(buf)
|
||||
|
||||
def _samefile(src, dst):
|
||||
# Macintosh, Unix.
|
||||
if hasattr(os.path, 'samefile'):
|
||||
try:
|
||||
return os.path.samefile(src, dst)
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
# All other platforms: check for same pathname.
|
||||
return (os.path.normcase(os.path.abspath(src)) ==
|
||||
os.path.normcase(os.path.abspath(dst)))
|
||||
|
||||
def copyfile(src, dst):
|
||||
"""Copy data from src to dst"""
|
||||
if _samefile(src, dst):
|
||||
raise Error("`%s` and `%s` are the same file" % (src, dst))
|
||||
|
||||
for fn in [src, dst]:
|
||||
try:
|
||||
st = os.stat(fn)
|
||||
except OSError:
|
||||
# File most likely does not exist
|
||||
pass
|
||||
else:
|
||||
# XXX What about other special files? (sockets, devices...)
|
||||
if stat.S_ISFIFO(st.st_mode):
|
||||
raise SpecialFileError("`%s` is a named pipe" % fn)
|
||||
|
||||
with open(src, 'rb') as fsrc:
|
||||
with open(dst, 'wb') as fdst:
|
||||
copyfileobj(fsrc, fdst)
|
||||
|
||||
def copymode(src, dst):
|
||||
"""Copy mode bits from src to dst"""
|
||||
if hasattr(os, 'chmod'):
|
||||
st = os.stat(src)
|
||||
mode = stat.S_IMODE(st.st_mode)
|
||||
os.chmod(dst, mode)
|
||||
|
||||
def copystat(src, dst):
|
||||
"""Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
|
||||
st = os.stat(src)
|
||||
mode = stat.S_IMODE(st.st_mode)
|
||||
if hasattr(os, 'utime'):
|
||||
os.utime(dst, (st.st_atime, st.st_mtime))
|
||||
if hasattr(os, 'chmod'):
|
||||
os.chmod(dst, mode)
|
||||
if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
|
||||
try:
|
||||
os.chflags(dst, st.st_flags)
|
||||
except OSError as why:
|
||||
if (not hasattr(errno, 'EOPNOTSUPP') or
|
||||
why.errno != errno.EOPNOTSUPP):
|
||||
raise
|
||||
|
||||
def copy(src, dst):
|
||||
"""Copy data and mode bits ("cp src dst").
|
||||
|
||||
The destination may be a directory.
|
||||
|
||||
"""
|
||||
if os.path.isdir(dst):
|
||||
dst = os.path.join(dst, os.path.basename(src))
|
||||
copyfile(src, dst)
|
||||
copymode(src, dst)
|
||||
|
||||
def copy2(src, dst):
|
||||
"""Copy data and all stat info ("cp -p src dst").
|
||||
|
||||
The destination may be a directory.
|
||||
|
||||
"""
|
||||
if os.path.isdir(dst):
|
||||
dst = os.path.join(dst, os.path.basename(src))
|
||||
copyfile(src, dst)
|
||||
copystat(src, dst)
|
||||
|
||||
def ignore_patterns(*patterns):
|
||||
"""Function that can be used as copytree() ignore parameter.
|
||||
|
||||
Patterns is a sequence of glob-style patterns
|
||||
that are used to exclude files"""
|
||||
def _ignore_patterns(path, names):
|
||||
ignored_names = []
|
||||
for pattern in patterns:
|
||||
ignored_names.extend(fnmatch.filter(names, pattern))
|
||||
return set(ignored_names)
|
||||
return _ignore_patterns
|
||||
|
||||
def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
||||
ignore_dangling_symlinks=False):
|
||||
"""Recursively copy a directory tree.
|
||||
|
||||
The destination directory must not already exist.
|
||||
If exception(s) occur, an Error is raised with a list of reasons.
|
||||
|
||||
If the optional symlinks flag is true, symbolic links in the
|
||||
source tree result in symbolic links in the destination tree; if
|
||||
it is false, the contents of the files pointed to by symbolic
|
||||
links are copied. If the file pointed by the symlink doesn't
|
||||
exist, an exception will be added in the list of errors raised in
|
||||
an Error exception at the end of the copy process.
|
||||
|
||||
You can set the optional ignore_dangling_symlinks flag to true if you
|
||||
want to silence this exception. Notice that this has no effect on
|
||||
platforms that don't support os.symlink.
|
||||
|
||||
The optional ignore argument is a callable. If given, it
|
||||
is called with the `src` parameter, which is the directory
|
||||
being visited by copytree(), and `names` which is the list of
|
||||
`src` contents, as returned by os.listdir():
|
||||
|
||||
callable(src, names) -> ignored_names
|
||||
|
||||
Since copytree() is called recursively, the callable will be
|
||||
called once for each directory that is copied. It returns a
|
||||
list of names relative to the `src` directory that should
|
||||
not be copied.
|
||||
|
||||
The optional copy_function argument is a callable that will be used
|
||||
to copy each file. It will be called with the source path and the
|
||||
destination path as arguments. By default, copy2() is used, but any
|
||||
function that supports the same signature (like copy()) can be used.
|
||||
|
||||
"""
|
||||
names = os.listdir(src)
|
||||
if ignore is not None:
|
||||
ignored_names = ignore(src, names)
|
||||
else:
|
||||
ignored_names = set()
|
||||
|
||||
os.makedirs(dst)
|
||||
errors = []
|
||||
for name in names:
|
||||
if name in ignored_names:
|
||||
continue
|
||||
srcname = os.path.join(src, name)
|
||||
dstname = os.path.join(dst, name)
|
||||
try:
|
||||
if os.path.islink(srcname):
|
||||
linkto = os.readlink(srcname)
|
||||
if symlinks:
|
||||
os.symlink(linkto, dstname)
|
||||
else:
|
||||
# ignore dangling symlink if the flag is on
|
||||
if not os.path.exists(linkto) and ignore_dangling_symlinks:
|
||||
continue
|
||||
# otherwise let the copy occurs. copy2 will raise an error
|
||||
copy_function(srcname, dstname)
|
||||
elif os.path.isdir(srcname):
|
||||
copytree(srcname, dstname, symlinks, ignore, copy_function)
|
||||
else:
|
||||
# Will raise a SpecialFileError for unsupported file types
|
||||
copy_function(srcname, dstname)
|
||||
# catch the Error from the recursive copytree so that we can
|
||||
# continue with other files
|
||||
except Error as err:
|
||||
errors.extend(err.args[0])
|
||||
except EnvironmentError as why:
|
||||
errors.append((srcname, dstname, str(why)))
|
||||
try:
|
||||
copystat(src, dst)
|
||||
except OSError as why:
|
||||
if WindowsError is not None and isinstance(why, WindowsError):
|
||||
# Copying file access times may fail on Windows
|
||||
pass
|
||||
else:
|
||||
errors.extend((src, dst, str(why)))
|
||||
if errors:
|
||||
raise Error(errors)
|
||||
|
||||
def rmtree(path, ignore_errors=False, onerror=None):
|
||||
"""Recursively delete a directory tree.
|
||||
|
||||
If ignore_errors is set, errors are ignored; otherwise, if onerror
|
||||
is set, it is called to handle the error with arguments (func,
|
||||
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
|
||||
path is the argument to that function that caused it to fail; and
|
||||
exc_info is a tuple returned by sys.exc_info(). If ignore_errors
|
||||
is false and onerror is None, an exception is raised.
|
||||
|
||||
"""
|
||||
if ignore_errors:
|
||||
def onerror(*args):
|
||||
pass
|
||||
elif onerror is None:
|
||||
def onerror(*args):
|
||||
raise
|
||||
try:
|
||||
if os.path.islink(path):
|
||||
# symlinks to directories are forbidden, see bug #1669
|
||||
raise OSError("Cannot call rmtree on a symbolic link")
|
||||
except OSError:
|
||||
onerror(os.path.islink, path, sys.exc_info())
|
||||
# can't continue even if onerror hook returns
|
||||
return
|
||||
names = []
|
||||
try:
|
||||
names = os.listdir(path)
|
||||
except os.error:
|
||||
onerror(os.listdir, path, sys.exc_info())
|
||||
for name in names:
|
||||
fullname = os.path.join(path, name)
|
||||
try:
|
||||
mode = os.lstat(fullname).st_mode
|
||||
except os.error:
|
||||
mode = 0
|
||||
if stat.S_ISDIR(mode):
|
||||
rmtree(fullname, ignore_errors, onerror)
|
||||
else:
|
||||
try:
|
||||
os.remove(fullname)
|
||||
except os.error:
|
||||
onerror(os.remove, fullname, sys.exc_info())
|
||||
try:
|
||||
os.rmdir(path)
|
||||
except os.error:
|
||||
onerror(os.rmdir, path, sys.exc_info())
|
||||
|
||||
|
||||
def _basename(path):
|
||||
# A basename() variant which first strips the trailing slash, if present.
|
||||
# Thus we always get the last component of the path, even for directories.
|
||||
return os.path.basename(path.rstrip(os.path.sep))
|
||||
|
||||
def move(src, dst):
|
||||
"""Recursively move a file or directory to another location. This is
|
||||
similar to the Unix "mv" command.
|
||||
|
||||
If the destination is a directory or a symlink to a directory, the source
|
||||
is moved inside the directory. The destination path must not already
|
||||
exist.
|
||||
|
||||
If the destination already exists but is not a directory, it may be
|
||||
overwritten depending on os.rename() semantics.
|
||||
|
||||
If the destination is on our current filesystem, then rename() is used.
|
||||
Otherwise, src is copied to the destination and then removed.
|
||||
A lot more could be done here... A look at a mv.c shows a lot of
|
||||
the issues this implementation glosses over.
|
||||
|
||||
"""
|
||||
real_dst = dst
|
||||
if os.path.isdir(dst):
|
||||
if _samefile(src, dst):
|
||||
# We might be on a case insensitive filesystem,
|
||||
# perform the rename anyway.
|
||||
os.rename(src, dst)
|
||||
return
|
||||
|
||||
real_dst = os.path.join(dst, _basename(src))
|
||||
if os.path.exists(real_dst):
|
||||
raise Error("Destination path '%s' already exists" % real_dst)
|
||||
try:
|
||||
os.rename(src, real_dst)
|
||||
except OSError:
|
||||
if os.path.isdir(src):
|
||||
if _destinsrc(src, dst):
|
||||
raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst))
|
||||
copytree(src, real_dst, symlinks=True)
|
||||
rmtree(src)
|
||||
else:
|
||||
copy2(src, real_dst)
|
||||
os.unlink(src)
|
||||
|
||||
def _destinsrc(src, dst):
|
||||
src = abspath(src)
|
||||
dst = abspath(dst)
|
||||
if not src.endswith(os.path.sep):
|
||||
src += os.path.sep
|
||||
if not dst.endswith(os.path.sep):
|
||||
dst += os.path.sep
|
||||
return dst.startswith(src)
|
||||
|
||||
def _get_gid(name):
|
||||
"""Returns a gid, given a group name."""
|
||||
if getgrnam is None or name is None:
|
||||
return None
|
||||
try:
|
||||
result = getgrnam(name)
|
||||
except KeyError:
|
||||
result = None
|
||||
if result is not None:
|
||||
return result[2]
|
||||
return None
|
||||
|
||||
def _get_uid(name):
|
||||
"""Returns an uid, given a user name."""
|
||||
if getpwnam is None or name is None:
|
||||
return None
|
||||
try:
|
||||
result = getpwnam(name)
|
||||
except KeyError:
|
||||
result = None
|
||||
if result is not None:
|
||||
return result[2]
|
||||
return None
|
||||
|
||||
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
|
||||
owner=None, group=None, logger=None):
|
||||
"""Create a (possibly compressed) tar file from all the files under
|
||||
'base_dir'.
|
||||
|
||||
'compress' must be "gzip" (the default), "bzip2", or None.
|
||||
|
||||
'owner' and 'group' can be used to define an owner and a group for the
|
||||
archive that is being built. If not provided, the current owner and group
|
||||
will be used.
|
||||
|
||||
The output tar file will be named 'base_name' + ".tar", possibly plus
|
||||
the appropriate compression extension (".gz", or ".bz2").
|
||||
|
||||
Returns the output filename.
|
||||
"""
|
||||
tar_compression = {'gzip': 'gz', None: ''}
|
||||
compress_ext = {'gzip': '.gz'}
|
||||
|
||||
if _BZ2_SUPPORTED:
|
||||
tar_compression['bzip2'] = 'bz2'
|
||||
compress_ext['bzip2'] = '.bz2'
|
||||
|
||||
# flags for compression program, each element of list will be an argument
|
||||
if compress is not None and compress not in compress_ext:
|
||||
raise ValueError("bad value for 'compress', or compression format not "
|
||||
"supported : {0}".format(compress))
|
||||
|
||||
archive_name = base_name + '.tar' + compress_ext.get(compress, '')
|
||||
archive_dir = os.path.dirname(archive_name)
|
||||
|
||||
if not os.path.exists(archive_dir):
|
||||
if logger is not None:
|
||||
logger.info("creating %s", archive_dir)
|
||||
if not dry_run:
|
||||
os.makedirs(archive_dir)
|
||||
|
||||
# creating the tarball
|
||||
if logger is not None:
|
||||
logger.info('Creating tar archive')
|
||||
|
||||
uid = _get_uid(owner)
|
||||
gid = _get_gid(group)
|
||||
|
||||
def _set_uid_gid(tarinfo):
|
||||
if gid is not None:
|
||||
tarinfo.gid = gid
|
||||
tarinfo.gname = group
|
||||
if uid is not None:
|
||||
tarinfo.uid = uid
|
||||
tarinfo.uname = owner
|
||||
return tarinfo
|
||||
|
||||
if not dry_run:
|
||||
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
|
||||
try:
|
||||
tar.add(base_dir, filter=_set_uid_gid)
|
||||
finally:
|
||||
tar.close()
|
||||
|
||||
return archive_name
|
||||
|
||||
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
|
||||
# XXX see if we want to keep an external call here
|
||||
if verbose:
|
||||
zipoptions = "-r"
|
||||
else:
|
||||
zipoptions = "-rq"
|
||||
from distutils.errors import DistutilsExecError
|
||||
from distutils.spawn import spawn
|
||||
try:
|
||||
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
|
||||
except DistutilsExecError:
|
||||
# XXX really should distinguish between "couldn't find
|
||||
# external 'zip' command" and "zip failed".
|
||||
raise ExecError("unable to create zip file '%s': "
|
||||
"could neither import the 'zipfile' module nor "
|
||||
"find a standalone zip utility") % zip_filename
|
||||
|
||||
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
|
||||
"""Create a zip file from all the files under 'base_dir'.
|
||||
|
||||
The output zip file will be named 'base_name' + ".zip". Uses either the
|
||||
"zipfile" Python module (if available) or the InfoZIP "zip" utility
|
||||
(if installed and found on the default search path). If neither tool is
|
||||
available, raises ExecError. Returns the name of the output zip
|
||||
file.
|
||||
"""
|
||||
zip_filename = base_name + ".zip"
|
||||
archive_dir = os.path.dirname(base_name)
|
||||
|
||||
if not os.path.exists(archive_dir):
|
||||
if logger is not None:
|
||||
logger.info("creating %s", archive_dir)
|
||||
if not dry_run:
|
||||
os.makedirs(archive_dir)
|
||||
|
||||
# If zipfile module is not available, try spawning an external 'zip'
|
||||
# command.
|
||||
try:
|
||||
import zipfile
|
||||
except ImportError:
|
||||
zipfile = None
|
||||
|
||||
if zipfile is None:
|
||||
_call_external_zip(base_dir, zip_filename, verbose, dry_run)
|
||||
else:
|
||||
if logger is not None:
|
||||
logger.info("creating '%s' and adding '%s' to it",
|
||||
zip_filename, base_dir)
|
||||
|
||||
if not dry_run:
|
||||
zip = zipfile.ZipFile(zip_filename, "w",
|
||||
compression=zipfile.ZIP_DEFLATED)
|
||||
|
||||
for dirpath, dirnames, filenames in os.walk(base_dir):
|
||||
for name in filenames:
|
||||
path = os.path.normpath(os.path.join(dirpath, name))
|
||||
if os.path.isfile(path):
|
||||
zip.write(path, path)
|
||||
if logger is not None:
|
||||
logger.info("adding '%s'", path)
|
||||
zip.close()
|
||||
|
||||
return zip_filename
|
||||
|
||||
_ARCHIVE_FORMATS = {
|
||||
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
|
||||
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
|
||||
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
|
||||
'zip': (_make_zipfile, [], "ZIP file"),
|
||||
}
|
||||
|
||||
if _BZ2_SUPPORTED:
|
||||
_ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')],
|
||||
"bzip2'ed tar-file")
|
||||
|
||||
def get_archive_formats():
|
||||
"""Returns a list of supported formats for archiving and unarchiving.
|
||||
|
||||
Each element of the returned sequence is a tuple (name, description)
|
||||
"""
|
||||
formats = [(name, registry[2]) for name, registry in
|
||||
_ARCHIVE_FORMATS.items()]
|
||||
formats.sort()
|
||||
return formats
|
||||
|
||||
def register_archive_format(name, function, extra_args=None, description=''):
|
||||
"""Registers an archive format.
|
||||
|
||||
name is the name of the format. function is the callable that will be
|
||||
used to create archives. If provided, extra_args is a sequence of
|
||||
(name, value) tuples that will be passed as arguments to the callable.
|
||||
description can be provided to describe the format, and will be returned
|
||||
by the get_archive_formats() function.
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = []
|
||||
if not isinstance(function, Callable):
|
||||
raise TypeError('The %s object is not callable' % function)
|
||||
if not isinstance(extra_args, (tuple, list)):
|
||||
raise TypeError('extra_args needs to be a sequence')
|
||||
for element in extra_args:
|
||||
if not isinstance(element, (tuple, list)) or len(element) !=2:
|
||||
raise TypeError('extra_args elements are : (arg_name, value)')
|
||||
|
||||
_ARCHIVE_FORMATS[name] = (function, extra_args, description)
|
||||
|
||||
def unregister_archive_format(name):
|
||||
del _ARCHIVE_FORMATS[name]
|
||||
|
||||
def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0,
|
||||
dry_run=0, owner=None, group=None, logger=None):
|
||||
"""Create an archive file (eg. zip or tar).
|
||||
|
||||
'base_name' is the name of the file to create, minus any format-specific
|
||||
extension; 'format' is the archive format: one of "zip", "tar", "bztar"
|
||||
or "gztar".
|
||||
|
||||
'root_dir' is a directory that will be the root directory of the
|
||||
archive; ie. we typically chdir into 'root_dir' before creating the
|
||||
archive. 'base_dir' is the directory where we start archiving from;
|
||||
ie. 'base_dir' will be the common prefix of all files and
|
||||
directories in the archive. 'root_dir' and 'base_dir' both default
|
||||
to the current directory. Returns the name of the archive file.
|
||||
|
||||
'owner' and 'group' are used when creating a tar archive. By default,
|
||||
uses the current owner and group.
|
||||
"""
|
||||
save_cwd = os.getcwd()
|
||||
if root_dir is not None:
|
||||
if logger is not None:
|
||||
logger.debug("changing into '%s'", root_dir)
|
||||
base_name = os.path.abspath(base_name)
|
||||
if not dry_run:
|
||||
os.chdir(root_dir)
|
||||
|
||||
if base_dir is None:
|
||||
base_dir = os.curdir
|
||||
|
||||
kwargs = {'dry_run': dry_run, 'logger': logger}
|
||||
|
||||
try:
|
||||
format_info = _ARCHIVE_FORMATS[format]
|
||||
except KeyError:
|
||||
raise ValueError("unknown archive format '%s'" % format)
|
||||
|
||||
func = format_info[0]
|
||||
for arg, val in format_info[1]:
|
||||
kwargs[arg] = val
|
||||
|
||||
if format != 'zip':
|
||||
kwargs['owner'] = owner
|
||||
kwargs['group'] = group
|
||||
|
||||
try:
|
||||
filename = func(base_name, base_dir, **kwargs)
|
||||
finally:
|
||||
if root_dir is not None:
|
||||
if logger is not None:
|
||||
logger.debug("changing back to '%s'", save_cwd)
|
||||
os.chdir(save_cwd)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def get_unpack_formats():
|
||||
"""Returns a list of supported formats for unpacking.
|
||||
|
||||
Each element of the returned sequence is a tuple
|
||||
(name, extensions, description)
|
||||
"""
|
||||
formats = [(name, info[0], info[3]) for name, info in
|
||||
_UNPACK_FORMATS.items()]
|
||||
formats.sort()
|
||||
return formats
|
||||
|
||||
def _check_unpack_options(extensions, function, extra_args):
|
||||
"""Checks what gets registered as an unpacker."""
|
||||
# first make sure no other unpacker is registered for this extension
|
||||
existing_extensions = {}
|
||||
for name, info in _UNPACK_FORMATS.items():
|
||||
for ext in info[0]:
|
||||
existing_extensions[ext] = name
|
||||
|
||||
for extension in extensions:
|
||||
if extension in existing_extensions:
|
||||
msg = '%s is already registered for "%s"'
|
||||
raise RegistryError(msg % (extension,
|
||||
existing_extensions[extension]))
|
||||
|
||||
if not isinstance(function, Callable):
|
||||
raise TypeError('The registered function must be a callable')
|
||||
|
||||
|
||||
def register_unpack_format(name, extensions, function, extra_args=None,
|
||||
description=''):
|
||||
"""Registers an unpack format.
|
||||
|
||||
`name` is the name of the format. `extensions` is a list of extensions
|
||||
corresponding to the format.
|
||||
|
||||
`function` is the callable that will be
|
||||
used to unpack archives. The callable will receive archives to unpack.
|
||||
If it's unable to handle an archive, it needs to raise a ReadError
|
||||
exception.
|
||||
|
||||
If provided, `extra_args` is a sequence of
|
||||
(name, value) tuples that will be passed as arguments to the callable.
|
||||
description can be provided to describe the format, and will be returned
|
||||
by the get_unpack_formats() function.
|
||||
"""
|
||||
if extra_args is None:
|
||||
extra_args = []
|
||||
_check_unpack_options(extensions, function, extra_args)
|
||||
_UNPACK_FORMATS[name] = extensions, function, extra_args, description
|
||||
|
||||
def unregister_unpack_format(name):
|
||||
"""Removes the pack format from the registry."""
|
||||
del _UNPACK_FORMATS[name]
|
||||
|
||||
def _ensure_directory(path):
|
||||
"""Ensure that the parent directory of `path` exists"""
|
||||
dirname = os.path.dirname(path)
|
||||
if not os.path.isdir(dirname):
|
||||
os.makedirs(dirname)
|
||||
|
||||
def _unpack_zipfile(filename, extract_dir):
|
||||
"""Unpack zip `filename` to `extract_dir`
|
||||
"""
|
||||
try:
|
||||
import zipfile
|
||||
except ImportError:
|
||||
raise ReadError('zlib not supported, cannot unpack this archive.')
|
||||
|
||||
if not zipfile.is_zipfile(filename):
|
||||
raise ReadError("%s is not a zip file" % filename)
|
||||
|
||||
zip = zipfile.ZipFile(filename)
|
||||
try:
|
||||
for info in zip.infolist():
|
||||
name = info.filename
|
||||
|
||||
# don't extract absolute paths or ones with .. in them
|
||||
if name.startswith('/') or '..' in name:
|
||||
continue
|
||||
|
||||
target = os.path.join(extract_dir, *name.split('/'))
|
||||
if not target:
|
||||
continue
|
||||
|
||||
_ensure_directory(target)
|
||||
if not name.endswith('/'):
|
||||
# file
|
||||
data = zip.read(info.filename)
|
||||
f = open(target, 'wb')
|
||||
try:
|
||||
f.write(data)
|
||||
finally:
|
||||
f.close()
|
||||
del data
|
||||
finally:
|
||||
zip.close()
|
||||
|
||||
def _unpack_tarfile(filename, extract_dir):
|
||||
"""Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir`
|
||||
"""
|
||||
try:
|
||||
tarobj = tarfile.open(filename)
|
||||
except tarfile.TarError:
|
||||
raise ReadError(
|
||||
"%s is not a compressed or uncompressed tar file" % filename)
|
||||
try:
|
||||
tarobj.extractall(extract_dir)
|
||||
finally:
|
||||
tarobj.close()
|
||||
|
||||
_UNPACK_FORMATS = {
|
||||
'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"),
|
||||
'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"),
|
||||
'zip': (['.zip'], _unpack_zipfile, [], "ZIP file")
|
||||
}
|
||||
|
||||
if _BZ2_SUPPORTED:
|
||||
_UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [],
|
||||
"bzip2'ed tar-file")
|
||||
|
||||
def _find_unpack_format(filename):
|
||||
for name, info in _UNPACK_FORMATS.items():
|
||||
for extension in info[0]:
|
||||
if filename.endswith(extension):
|
||||
return name
|
||||
return None
|
||||
|
||||
def unpack_archive(filename, extract_dir=None, format=None):
|
||||
"""Unpack an archive.
|
||||
|
||||
`filename` is the name of the archive.
|
||||
|
||||
`extract_dir` is the name of the target directory, where the archive
|
||||
is unpacked. If not provided, the current working directory is used.
|
||||
|
||||
`format` is the archive format: one of "zip", "tar", or "gztar". Or any
|
||||
other registered format. If not provided, unpack_archive will use the
|
||||
filename extension and see if an unpacker was registered for that
|
||||
extension.
|
||||
|
||||
In case none is found, a ValueError is raised.
|
||||
"""
|
||||
if extract_dir is None:
|
||||
extract_dir = os.getcwd()
|
||||
|
||||
if format is not None:
|
||||
try:
|
||||
format_info = _UNPACK_FORMATS[format]
|
||||
except KeyError:
|
||||
raise ValueError("Unknown unpack format '{0}'".format(format))
|
||||
|
||||
func = format_info[1]
|
||||
func(filename, extract_dir, **dict(format_info[2]))
|
||||
else:
|
||||
# we need to look at the registered unpackers supported extensions
|
||||
format = _find_unpack_format(filename)
|
||||
if format is None:
|
||||
raise ReadError("Unknown archive format '{0}'".format(filename))
|
||||
|
||||
func = _UNPACK_FORMATS[format][1]
|
||||
kwargs = dict(_UNPACK_FORMATS[format][2])
|
||||
func(filename, extract_dir, **kwargs)
|
||||
@@ -0,0 +1,84 @@
|
||||
[posix_prefix]
|
||||
# Configuration directories. Some of these come straight out of the
|
||||
# configure script. They are for implementing the other variables, not to
|
||||
# be used directly in [resource_locations].
|
||||
confdir = /etc
|
||||
datadir = /usr/share
|
||||
libdir = /usr/lib
|
||||
statedir = /var
|
||||
# User resource directory
|
||||
local = ~/.local/{distribution.name}
|
||||
|
||||
stdlib = {base}/lib/python{py_version_short}
|
||||
platstdlib = {platbase}/lib/python{py_version_short}
|
||||
purelib = {base}/lib/python{py_version_short}/site-packages
|
||||
platlib = {platbase}/lib/python{py_version_short}/site-packages
|
||||
include = {base}/include/python{py_version_short}{abiflags}
|
||||
platinclude = {platbase}/include/python{py_version_short}{abiflags}
|
||||
data = {base}
|
||||
|
||||
[posix_home]
|
||||
stdlib = {base}/lib/python
|
||||
platstdlib = {base}/lib/python
|
||||
purelib = {base}/lib/python
|
||||
platlib = {base}/lib/python
|
||||
include = {base}/include/python
|
||||
platinclude = {base}/include/python
|
||||
scripts = {base}/bin
|
||||
data = {base}
|
||||
|
||||
[nt]
|
||||
stdlib = {base}/Lib
|
||||
platstdlib = {base}/Lib
|
||||
purelib = {base}/Lib/site-packages
|
||||
platlib = {base}/Lib/site-packages
|
||||
include = {base}/Include
|
||||
platinclude = {base}/Include
|
||||
scripts = {base}/Scripts
|
||||
data = {base}
|
||||
|
||||
[os2]
|
||||
stdlib = {base}/Lib
|
||||
platstdlib = {base}/Lib
|
||||
purelib = {base}/Lib/site-packages
|
||||
platlib = {base}/Lib/site-packages
|
||||
include = {base}/Include
|
||||
platinclude = {base}/Include
|
||||
scripts = {base}/Scripts
|
||||
data = {base}
|
||||
|
||||
[os2_home]
|
||||
stdlib = {userbase}/lib/python{py_version_short}
|
||||
platstdlib = {userbase}/lib/python{py_version_short}
|
||||
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
include = {userbase}/include/python{py_version_short}
|
||||
scripts = {userbase}/bin
|
||||
data = {userbase}
|
||||
|
||||
[nt_user]
|
||||
stdlib = {userbase}/Python{py_version_nodot}
|
||||
platstdlib = {userbase}/Python{py_version_nodot}
|
||||
purelib = {userbase}/Python{py_version_nodot}/site-packages
|
||||
platlib = {userbase}/Python{py_version_nodot}/site-packages
|
||||
include = {userbase}/Python{py_version_nodot}/Include
|
||||
scripts = {userbase}/Scripts
|
||||
data = {userbase}
|
||||
|
||||
[posix_user]
|
||||
stdlib = {userbase}/lib/python{py_version_short}
|
||||
platstdlib = {userbase}/lib/python{py_version_short}
|
||||
purelib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
platlib = {userbase}/lib/python{py_version_short}/site-packages
|
||||
include = {userbase}/include/python{py_version_short}
|
||||
scripts = {userbase}/bin
|
||||
data = {userbase}
|
||||
|
||||
[osx_framework_user]
|
||||
stdlib = {userbase}/lib/python
|
||||
platstdlib = {userbase}/lib/python
|
||||
purelib = {userbase}/lib/python/site-packages
|
||||
platlib = {userbase}/lib/python/site-packages
|
||||
include = {userbase}/include
|
||||
scripts = {userbase}/bin
|
||||
data = {userbase}
|
||||
@@ -0,0 +1,786 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012 The Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""Access to Python's configuration information."""
|
||||
|
||||
import codecs
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from os.path import pardir, realpath
|
||||
try:
|
||||
import configparser
|
||||
except ImportError:
|
||||
import ConfigParser as configparser
|
||||
|
||||
|
||||
__all__ = [
|
||||
'get_config_h_filename',
|
||||
'get_config_var',
|
||||
'get_config_vars',
|
||||
'get_makefile_filename',
|
||||
'get_path',
|
||||
'get_path_names',
|
||||
'get_paths',
|
||||
'get_platform',
|
||||
'get_python_version',
|
||||
'get_scheme_names',
|
||||
'parse_config_h',
|
||||
]
|
||||
|
||||
|
||||
def _safe_realpath(path):
|
||||
try:
|
||||
return realpath(path)
|
||||
except OSError:
|
||||
return path
|
||||
|
||||
|
||||
if sys.executable:
|
||||
_PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable))
|
||||
else:
|
||||
# sys.executable can be empty if argv[0] has been changed and Python is
|
||||
# unable to retrieve the real program name
|
||||
_PROJECT_BASE = _safe_realpath(os.getcwd())
|
||||
|
||||
if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower():
|
||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir))
|
||||
# PC/VS7.1
|
||||
if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower():
|
||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
||||
# PC/AMD64
|
||||
if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower():
|
||||
_PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir))
|
||||
|
||||
|
||||
def is_python_build():
|
||||
for fn in ("Setup.dist", "Setup.local"):
|
||||
if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)):
|
||||
return True
|
||||
return False
|
||||
|
||||
_PYTHON_BUILD = is_python_build()
|
||||
|
||||
_cfg_read = False
|
||||
|
||||
def _ensure_cfg_read():
|
||||
global _cfg_read
|
||||
if not _cfg_read:
|
||||
from ..resources import finder
|
||||
backport_package = __name__.rsplit('.', 1)[0]
|
||||
_finder = finder(backport_package)
|
||||
_cfgfile = _finder.find('sysconfig.cfg')
|
||||
assert _cfgfile, 'sysconfig.cfg exists'
|
||||
with _cfgfile.as_stream() as s:
|
||||
_SCHEMES.readfp(s)
|
||||
if _PYTHON_BUILD:
|
||||
for scheme in ('posix_prefix', 'posix_home'):
|
||||
_SCHEMES.set(scheme, 'include', '{srcdir}/Include')
|
||||
_SCHEMES.set(scheme, 'platinclude', '{projectbase}/.')
|
||||
|
||||
_cfg_read = True
|
||||
|
||||
|
||||
_SCHEMES = configparser.RawConfigParser()
|
||||
_VAR_REPL = re.compile(r'\{([^{]*?)\}')
|
||||
|
||||
def _expand_globals(config):
|
||||
_ensure_cfg_read()
|
||||
if config.has_section('globals'):
|
||||
globals = config.items('globals')
|
||||
else:
|
||||
globals = tuple()
|
||||
|
||||
sections = config.sections()
|
||||
for section in sections:
|
||||
if section == 'globals':
|
||||
continue
|
||||
for option, value in globals:
|
||||
if config.has_option(section, option):
|
||||
continue
|
||||
config.set(section, option, value)
|
||||
config.remove_section('globals')
|
||||
|
||||
# now expanding local variables defined in the cfg file
|
||||
#
|
||||
for section in config.sections():
|
||||
variables = dict(config.items(section))
|
||||
|
||||
def _replacer(matchobj):
|
||||
name = matchobj.group(1)
|
||||
if name in variables:
|
||||
return variables[name]
|
||||
return matchobj.group(0)
|
||||
|
||||
for option, value in config.items(section):
|
||||
config.set(section, option, _VAR_REPL.sub(_replacer, value))
|
||||
|
||||
#_expand_globals(_SCHEMES)
|
||||
|
||||
_PY_VERSION = '%s.%s.%s' % sys.version_info[:3]
|
||||
_PY_VERSION_SHORT = '%s.%s' % sys.version_info[:2]
|
||||
_PY_VERSION_SHORT_NO_DOT = '%s%s' % sys.version_info[:2]
|
||||
_PREFIX = os.path.normpath(sys.prefix)
|
||||
_EXEC_PREFIX = os.path.normpath(sys.exec_prefix)
|
||||
_CONFIG_VARS = None
|
||||
_USER_BASE = None
|
||||
|
||||
|
||||
def _subst_vars(path, local_vars):
|
||||
"""In the string `path`, replace tokens like {some.thing} with the
|
||||
corresponding value from the map `local_vars`.
|
||||
|
||||
If there is no corresponding value, leave the token unchanged.
|
||||
"""
|
||||
def _replacer(matchobj):
|
||||
name = matchobj.group(1)
|
||||
if name in local_vars:
|
||||
return local_vars[name]
|
||||
elif name in os.environ:
|
||||
return os.environ[name]
|
||||
return matchobj.group(0)
|
||||
return _VAR_REPL.sub(_replacer, path)
|
||||
|
||||
|
||||
def _extend_dict(target_dict, other_dict):
|
||||
target_keys = target_dict.keys()
|
||||
for key, value in other_dict.items():
|
||||
if key in target_keys:
|
||||
continue
|
||||
target_dict[key] = value
|
||||
|
||||
|
||||
def _expand_vars(scheme, vars):
|
||||
res = {}
|
||||
if vars is None:
|
||||
vars = {}
|
||||
_extend_dict(vars, get_config_vars())
|
||||
|
||||
for key, value in _SCHEMES.items(scheme):
|
||||
if os.name in ('posix', 'nt'):
|
||||
value = os.path.expanduser(value)
|
||||
res[key] = os.path.normpath(_subst_vars(value, vars))
|
||||
return res
|
||||
|
||||
|
||||
def format_value(value, vars):
|
||||
def _replacer(matchobj):
|
||||
name = matchobj.group(1)
|
||||
if name in vars:
|
||||
return vars[name]
|
||||
return matchobj.group(0)
|
||||
return _VAR_REPL.sub(_replacer, value)
|
||||
|
||||
|
||||
def _get_default_scheme():
|
||||
if os.name == 'posix':
|
||||
# the default scheme for posix is posix_prefix
|
||||
return 'posix_prefix'
|
||||
return os.name
|
||||
|
||||
|
||||
def _getuserbase():
|
||||
env_base = os.environ.get("PYTHONUSERBASE", None)
|
||||
|
||||
def joinuser(*args):
|
||||
return os.path.expanduser(os.path.join(*args))
|
||||
|
||||
# what about 'os2emx', 'riscos' ?
|
||||
if os.name == "nt":
|
||||
base = os.environ.get("APPDATA") or "~"
|
||||
if env_base:
|
||||
return env_base
|
||||
else:
|
||||
return joinuser(base, "Python")
|
||||
|
||||
if sys.platform == "darwin":
|
||||
framework = get_config_var("PYTHONFRAMEWORK")
|
||||
if framework:
|
||||
if env_base:
|
||||
return env_base
|
||||
else:
|
||||
return joinuser("~", "Library", framework, "%d.%d" %
|
||||
sys.version_info[:2])
|
||||
|
||||
if env_base:
|
||||
return env_base
|
||||
else:
|
||||
return joinuser("~", ".local")
|
||||
|
||||
|
||||
def _parse_makefile(filename, vars=None):
|
||||
"""Parse a Makefile-style file.
|
||||
|
||||
A dictionary containing name/value pairs is returned. If an
|
||||
optional dictionary is passed in as the second argument, it is
|
||||
used instead of a new dictionary.
|
||||
"""
|
||||
# Regexes needed for parsing Makefile (and similar syntaxes,
|
||||
# like old-style Setup files).
|
||||
_variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)")
|
||||
_findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)")
|
||||
_findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}")
|
||||
|
||||
if vars is None:
|
||||
vars = {}
|
||||
done = {}
|
||||
notdone = {}
|
||||
|
||||
with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
if line.startswith('#') or line.strip() == '':
|
||||
continue
|
||||
m = _variable_rx.match(line)
|
||||
if m:
|
||||
n, v = m.group(1, 2)
|
||||
v = v.strip()
|
||||
# `$$' is a literal `$' in make
|
||||
tmpv = v.replace('$$', '')
|
||||
|
||||
if "$" in tmpv:
|
||||
notdone[n] = v
|
||||
else:
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
# insert literal `$'
|
||||
done[n] = v.replace('$$', '$')
|
||||
else:
|
||||
done[n] = v
|
||||
|
||||
# do variable interpolation here
|
||||
variables = list(notdone.keys())
|
||||
|
||||
# Variables with a 'PY_' prefix in the makefile. These need to
|
||||
# be made available without that prefix through sysconfig.
|
||||
# Special care is needed to ensure that variable expansion works, even
|
||||
# if the expansion uses the name without a prefix.
|
||||
renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS')
|
||||
|
||||
while len(variables) > 0:
|
||||
for name in tuple(variables):
|
||||
value = notdone[name]
|
||||
m = _findvar1_rx.search(value) or _findvar2_rx.search(value)
|
||||
if m is not None:
|
||||
n = m.group(1)
|
||||
found = True
|
||||
if n in done:
|
||||
item = str(done[n])
|
||||
elif n in notdone:
|
||||
# get it on a subsequent round
|
||||
found = False
|
||||
elif n in os.environ:
|
||||
# do it like make: fall back to environment
|
||||
item = os.environ[n]
|
||||
|
||||
elif n in renamed_variables:
|
||||
if (name.startswith('PY_') and
|
||||
name[3:] in renamed_variables):
|
||||
item = ""
|
||||
|
||||
elif 'PY_' + n in notdone:
|
||||
found = False
|
||||
|
||||
else:
|
||||
item = str(done['PY_' + n])
|
||||
|
||||
else:
|
||||
done[n] = item = ""
|
||||
|
||||
if found:
|
||||
after = value[m.end():]
|
||||
value = value[:m.start()] + item + after
|
||||
if "$" in after:
|
||||
notdone[name] = value
|
||||
else:
|
||||
try:
|
||||
value = int(value)
|
||||
except ValueError:
|
||||
done[name] = value.strip()
|
||||
else:
|
||||
done[name] = value
|
||||
variables.remove(name)
|
||||
|
||||
if (name.startswith('PY_') and
|
||||
name[3:] in renamed_variables):
|
||||
|
||||
name = name[3:]
|
||||
if name not in done:
|
||||
done[name] = value
|
||||
|
||||
else:
|
||||
# bogus variable reference (e.g. "prefix=$/opt/python");
|
||||
# just drop it since we can't deal
|
||||
done[name] = value
|
||||
variables.remove(name)
|
||||
|
||||
# strip spurious spaces
|
||||
for k, v in done.items():
|
||||
if isinstance(v, str):
|
||||
done[k] = v.strip()
|
||||
|
||||
# save the results in the global dictionary
|
||||
vars.update(done)
|
||||
return vars
|
||||
|
||||
|
||||
def get_makefile_filename():
|
||||
"""Return the path of the Makefile."""
|
||||
if _PYTHON_BUILD:
|
||||
return os.path.join(_PROJECT_BASE, "Makefile")
|
||||
if hasattr(sys, 'abiflags'):
|
||||
config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags)
|
||||
else:
|
||||
config_dir_name = 'config'
|
||||
return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile')
|
||||
|
||||
|
||||
def _init_posix(vars):
|
||||
"""Initialize the module as appropriate for POSIX systems."""
|
||||
# load the installed Makefile:
|
||||
makefile = get_makefile_filename()
|
||||
try:
|
||||
_parse_makefile(makefile, vars)
|
||||
except IOError as e:
|
||||
msg = "invalid Python installation: unable to open %s" % makefile
|
||||
if hasattr(e, "strerror"):
|
||||
msg = msg + " (%s)" % e.strerror
|
||||
raise IOError(msg)
|
||||
# load the installed pyconfig.h:
|
||||
config_h = get_config_h_filename()
|
||||
try:
|
||||
with open(config_h) as f:
|
||||
parse_config_h(f, vars)
|
||||
except IOError as e:
|
||||
msg = "invalid Python installation: unable to open %s" % config_h
|
||||
if hasattr(e, "strerror"):
|
||||
msg = msg + " (%s)" % e.strerror
|
||||
raise IOError(msg)
|
||||
# On AIX, there are wrong paths to the linker scripts in the Makefile
|
||||
# -- these paths are relative to the Python source, but when installed
|
||||
# the scripts are in another directory.
|
||||
if _PYTHON_BUILD:
|
||||
vars['LDSHARED'] = vars['BLDSHARED']
|
||||
|
||||
|
||||
def _init_non_posix(vars):
|
||||
"""Initialize the module as appropriate for NT"""
|
||||
# set basic install directories
|
||||
vars['LIBDEST'] = get_path('stdlib')
|
||||
vars['BINLIBDEST'] = get_path('platstdlib')
|
||||
vars['INCLUDEPY'] = get_path('include')
|
||||
vars['SO'] = '.pyd'
|
||||
vars['EXE'] = '.exe'
|
||||
vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT
|
||||
vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable))
|
||||
|
||||
#
|
||||
# public APIs
|
||||
#
|
||||
|
||||
|
||||
def parse_config_h(fp, vars=None):
|
||||
"""Parse a config.h-style file.
|
||||
|
||||
A dictionary containing name/value pairs is returned. If an
|
||||
optional dictionary is passed in as the second argument, it is
|
||||
used instead of a new dictionary.
|
||||
"""
|
||||
if vars is None:
|
||||
vars = {}
|
||||
define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n")
|
||||
undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n")
|
||||
|
||||
while True:
|
||||
line = fp.readline()
|
||||
if not line:
|
||||
break
|
||||
m = define_rx.match(line)
|
||||
if m:
|
||||
n, v = m.group(1, 2)
|
||||
try:
|
||||
v = int(v)
|
||||
except ValueError:
|
||||
pass
|
||||
vars[n] = v
|
||||
else:
|
||||
m = undef_rx.match(line)
|
||||
if m:
|
||||
vars[m.group(1)] = 0
|
||||
return vars
|
||||
|
||||
|
||||
def get_config_h_filename():
|
||||
"""Return the path of pyconfig.h."""
|
||||
if _PYTHON_BUILD:
|
||||
if os.name == "nt":
|
||||
inc_dir = os.path.join(_PROJECT_BASE, "PC")
|
||||
else:
|
||||
inc_dir = _PROJECT_BASE
|
||||
else:
|
||||
inc_dir = get_path('platinclude')
|
||||
return os.path.join(inc_dir, 'pyconfig.h')
|
||||
|
||||
|
||||
def get_scheme_names():
|
||||
"""Return a tuple containing the schemes names."""
|
||||
return tuple(sorted(_SCHEMES.sections()))
|
||||
|
||||
|
||||
def get_path_names():
|
||||
"""Return a tuple containing the paths names."""
|
||||
# xxx see if we want a static list
|
||||
return _SCHEMES.options('posix_prefix')
|
||||
|
||||
|
||||
def get_paths(scheme=_get_default_scheme(), vars=None, expand=True):
|
||||
"""Return a mapping containing an install scheme.
|
||||
|
||||
``scheme`` is the install scheme name. If not provided, it will
|
||||
return the default scheme for the current platform.
|
||||
"""
|
||||
_ensure_cfg_read()
|
||||
if expand:
|
||||
return _expand_vars(scheme, vars)
|
||||
else:
|
||||
return dict(_SCHEMES.items(scheme))
|
||||
|
||||
|
||||
def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True):
|
||||
"""Return a path corresponding to the scheme.
|
||||
|
||||
``scheme`` is the install scheme name.
|
||||
"""
|
||||
return get_paths(scheme, vars, expand)[name]
|
||||
|
||||
|
||||
def get_config_vars(*args):
|
||||
"""With no arguments, return a dictionary of all configuration
|
||||
variables relevant for the current platform.
|
||||
|
||||
On Unix, this means every variable defined in Python's installed Makefile;
|
||||
On Windows and Mac OS it's a much smaller set.
|
||||
|
||||
With arguments, return a list of values that result from looking up
|
||||
each argument in the configuration variable dictionary.
|
||||
"""
|
||||
global _CONFIG_VARS
|
||||
if _CONFIG_VARS is None:
|
||||
_CONFIG_VARS = {}
|
||||
# Normalized versions of prefix and exec_prefix are handy to have;
|
||||
# in fact, these are the standard versions used most places in the
|
||||
# distutils2 module.
|
||||
_CONFIG_VARS['prefix'] = _PREFIX
|
||||
_CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX
|
||||
_CONFIG_VARS['py_version'] = _PY_VERSION
|
||||
_CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT
|
||||
_CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2]
|
||||
_CONFIG_VARS['base'] = _PREFIX
|
||||
_CONFIG_VARS['platbase'] = _EXEC_PREFIX
|
||||
_CONFIG_VARS['projectbase'] = _PROJECT_BASE
|
||||
try:
|
||||
_CONFIG_VARS['abiflags'] = sys.abiflags
|
||||
except AttributeError:
|
||||
# sys.abiflags may not be defined on all platforms.
|
||||
_CONFIG_VARS['abiflags'] = ''
|
||||
|
||||
if os.name in ('nt', 'os2'):
|
||||
_init_non_posix(_CONFIG_VARS)
|
||||
if os.name == 'posix':
|
||||
_init_posix(_CONFIG_VARS)
|
||||
# Setting 'userbase' is done below the call to the
|
||||
# init function to enable using 'get_config_var' in
|
||||
# the init-function.
|
||||
if sys.version >= '2.6':
|
||||
_CONFIG_VARS['userbase'] = _getuserbase()
|
||||
|
||||
if 'srcdir' not in _CONFIG_VARS:
|
||||
_CONFIG_VARS['srcdir'] = _PROJECT_BASE
|
||||
else:
|
||||
_CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir'])
|
||||
|
||||
# Convert srcdir into an absolute path if it appears necessary.
|
||||
# Normally it is relative to the build directory. However, during
|
||||
# testing, for example, we might be running a non-installed python
|
||||
# from a different directory.
|
||||
if _PYTHON_BUILD and os.name == "posix":
|
||||
base = _PROJECT_BASE
|
||||
try:
|
||||
cwd = os.getcwd()
|
||||
except OSError:
|
||||
cwd = None
|
||||
if (not os.path.isabs(_CONFIG_VARS['srcdir']) and
|
||||
base != cwd):
|
||||
# srcdir is relative and we are not in the same directory
|
||||
# as the executable. Assume executable is in the build
|
||||
# directory and make srcdir absolute.
|
||||
srcdir = os.path.join(base, _CONFIG_VARS['srcdir'])
|
||||
_CONFIG_VARS['srcdir'] = os.path.normpath(srcdir)
|
||||
|
||||
if sys.platform == 'darwin':
|
||||
kernel_version = os.uname()[2] # Kernel version (8.4.3)
|
||||
major_version = int(kernel_version.split('.')[0])
|
||||
|
||||
if major_version < 8:
|
||||
# On Mac OS X before 10.4, check if -arch and -isysroot
|
||||
# are in CFLAGS or LDFLAGS and remove them if they are.
|
||||
# This is needed when building extensions on a 10.3 system
|
||||
# using a universal build of python.
|
||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||
# a number of derived variables. These need to be
|
||||
# patched up as well.
|
||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||
flags = _CONFIG_VARS[key]
|
||||
flags = re.sub(r'-arch\s+\w+\s', ' ', flags)
|
||||
flags = re.sub('-isysroot [^ \t]*', ' ', flags)
|
||||
_CONFIG_VARS[key] = flags
|
||||
else:
|
||||
# Allow the user to override the architecture flags using
|
||||
# an environment variable.
|
||||
# NOTE: This name was introduced by Apple in OSX 10.5 and
|
||||
# is used by several scripting languages distributed with
|
||||
# that OS release.
|
||||
if 'ARCHFLAGS' in os.environ:
|
||||
arch = os.environ['ARCHFLAGS']
|
||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||
# a number of derived variables. These need to be
|
||||
# patched up as well.
|
||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||
|
||||
flags = _CONFIG_VARS[key]
|
||||
flags = re.sub(r'-arch\s+\w+\s', ' ', flags)
|
||||
flags = flags + ' ' + arch
|
||||
_CONFIG_VARS[key] = flags
|
||||
|
||||
# If we're on OSX 10.5 or later and the user tries to
|
||||
# compiles an extension using an SDK that is not present
|
||||
# on the current machine it is better to not use an SDK
|
||||
# than to fail.
|
||||
#
|
||||
# The major usecase for this is users using a Python.org
|
||||
# binary installer on OSX 10.6: that installer uses
|
||||
# the 10.4u SDK, but that SDK is not installed by default
|
||||
# when you install Xcode.
|
||||
#
|
||||
CFLAGS = _CONFIG_VARS.get('CFLAGS', '')
|
||||
m = re.search(r'-isysroot\s+(\S+)', CFLAGS)
|
||||
if m is not None:
|
||||
sdk = m.group(1)
|
||||
if not os.path.exists(sdk):
|
||||
for key in ('LDFLAGS', 'BASECFLAGS',
|
||||
# a number of derived variables. These need to be
|
||||
# patched up as well.
|
||||
'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'):
|
||||
|
||||
flags = _CONFIG_VARS[key]
|
||||
flags = re.sub(r'-isysroot\s+\S+(\s|$)', ' ', flags)
|
||||
_CONFIG_VARS[key] = flags
|
||||
|
||||
if args:
|
||||
vals = []
|
||||
for name in args:
|
||||
vals.append(_CONFIG_VARS.get(name))
|
||||
return vals
|
||||
else:
|
||||
return _CONFIG_VARS
|
||||
|
||||
|
||||
def get_config_var(name):
|
||||
"""Return the value of a single variable using the dictionary returned by
|
||||
'get_config_vars()'.
|
||||
|
||||
Equivalent to get_config_vars().get(name)
|
||||
"""
|
||||
return get_config_vars().get(name)
|
||||
|
||||
|
||||
def get_platform():
|
||||
"""Return a string that identifies the current platform.
|
||||
|
||||
This is used mainly to distinguish platform-specific build directories and
|
||||
platform-specific built distributions. Typically includes the OS name
|
||||
and version and the architecture (as supplied by 'os.uname()'),
|
||||
although the exact information included depends on the OS; eg. for IRIX
|
||||
the architecture isn't particularly important (IRIX only runs on SGI
|
||||
hardware), but for Linux the kernel version isn't particularly
|
||||
important.
|
||||
|
||||
Examples of returned values:
|
||||
linux-i586
|
||||
linux-alpha (?)
|
||||
solaris-2.6-sun4u
|
||||
irix-5.3
|
||||
irix64-6.2
|
||||
|
||||
Windows will return one of:
|
||||
win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
|
||||
win-ia64 (64bit Windows on Itanium)
|
||||
win32 (all others - specifically, sys.platform is returned)
|
||||
|
||||
For other non-POSIX platforms, currently just returns 'sys.platform'.
|
||||
"""
|
||||
if os.name == 'nt':
|
||||
# sniff sys.version for architecture.
|
||||
prefix = " bit ("
|
||||
i = sys.version.find(prefix)
|
||||
if i == -1:
|
||||
return sys.platform
|
||||
j = sys.version.find(")", i)
|
||||
look = sys.version[i+len(prefix):j].lower()
|
||||
if look == 'amd64':
|
||||
return 'win-amd64'
|
||||
if look == 'itanium':
|
||||
return 'win-ia64'
|
||||
return sys.platform
|
||||
|
||||
if os.name != "posix" or not hasattr(os, 'uname'):
|
||||
# XXX what about the architecture? NT is Intel or Alpha,
|
||||
# Mac OS is M68k or PPC, etc.
|
||||
return sys.platform
|
||||
|
||||
# Try to distinguish various flavours of Unix
|
||||
osname, host, release, version, machine = os.uname()
|
||||
|
||||
# Convert the OS name to lowercase, remove '/' characters
|
||||
# (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
|
||||
osname = osname.lower().replace('/', '')
|
||||
machine = machine.replace(' ', '_')
|
||||
machine = machine.replace('/', '-')
|
||||
|
||||
if osname[:5] == "linux":
|
||||
# At least on Linux/Intel, 'machine' is the processor --
|
||||
# i386, etc.
|
||||
# XXX what about Alpha, SPARC, etc?
|
||||
return "%s-%s" % (osname, machine)
|
||||
elif osname[:5] == "sunos":
|
||||
if release[0] >= "5": # SunOS 5 == Solaris 2
|
||||
osname = "solaris"
|
||||
release = "%d.%s" % (int(release[0]) - 3, release[2:])
|
||||
# fall through to standard osname-release-machine representation
|
||||
elif osname[:4] == "irix": # could be "irix64"!
|
||||
return "%s-%s" % (osname, release)
|
||||
elif osname[:3] == "aix":
|
||||
return "%s-%s.%s" % (osname, version, release)
|
||||
elif osname[:6] == "cygwin":
|
||||
osname = "cygwin"
|
||||
rel_re = re.compile(r'[\d.]+')
|
||||
m = rel_re.match(release)
|
||||
if m:
|
||||
release = m.group()
|
||||
elif osname[:6] == "darwin":
|
||||
#
|
||||
# For our purposes, we'll assume that the system version from
|
||||
# distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set
|
||||
# to. This makes the compatibility story a bit more sane because the
|
||||
# machine is going to compile and link as if it were
|
||||
# MACOSX_DEPLOYMENT_TARGET.
|
||||
cfgvars = get_config_vars()
|
||||
macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET')
|
||||
|
||||
if True:
|
||||
# Always calculate the release of the running machine,
|
||||
# needed to determine if we can build fat binaries or not.
|
||||
|
||||
macrelease = macver
|
||||
# Get the system version. Reading this plist is a documented
|
||||
# way to get the system version (see the documentation for
|
||||
# the Gestalt Manager)
|
||||
try:
|
||||
f = open('/System/Library/CoreServices/SystemVersion.plist')
|
||||
except IOError:
|
||||
# We're on a plain darwin box, fall back to the default
|
||||
# behaviour.
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
m = re.search(r'<key>ProductUserVisibleVersion</key>\s*'
|
||||
r'<string>(.*?)</string>', f.read())
|
||||
finally:
|
||||
f.close()
|
||||
if m is not None:
|
||||
macrelease = '.'.join(m.group(1).split('.')[:2])
|
||||
# else: fall back to the default behaviour
|
||||
|
||||
if not macver:
|
||||
macver = macrelease
|
||||
|
||||
if macver:
|
||||
release = macver
|
||||
osname = "macosx"
|
||||
|
||||
if ((macrelease + '.') >= '10.4.' and
|
||||
'-arch' in get_config_vars().get('CFLAGS', '').strip()):
|
||||
# The universal build will build fat binaries, but not on
|
||||
# systems before 10.4
|
||||
#
|
||||
# Try to detect 4-way universal builds, those have machine-type
|
||||
# 'universal' instead of 'fat'.
|
||||
|
||||
machine = 'fat'
|
||||
cflags = get_config_vars().get('CFLAGS')
|
||||
|
||||
archs = re.findall(r'-arch\s+(\S+)', cflags)
|
||||
archs = tuple(sorted(set(archs)))
|
||||
|
||||
if len(archs) == 1:
|
||||
machine = archs[0]
|
||||
elif archs == ('i386', 'ppc'):
|
||||
machine = 'fat'
|
||||
elif archs == ('i386', 'x86_64'):
|
||||
machine = 'intel'
|
||||
elif archs == ('i386', 'ppc', 'x86_64'):
|
||||
machine = 'fat3'
|
||||
elif archs == ('ppc64', 'x86_64'):
|
||||
machine = 'fat64'
|
||||
elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'):
|
||||
machine = 'universal'
|
||||
else:
|
||||
raise ValueError(
|
||||
"Don't know machine value for archs=%r" % (archs,))
|
||||
|
||||
elif machine == 'i386':
|
||||
# On OSX the machine type returned by uname is always the
|
||||
# 32-bit variant, even if the executable architecture is
|
||||
# the 64-bit variant
|
||||
if sys.maxsize >= 2**32:
|
||||
machine = 'x86_64'
|
||||
|
||||
elif machine in ('PowerPC', 'Power_Macintosh'):
|
||||
# Pick a sane name for the PPC architecture.
|
||||
# See 'i386' case
|
||||
if sys.maxsize >= 2**32:
|
||||
machine = 'ppc64'
|
||||
else:
|
||||
machine = 'ppc'
|
||||
|
||||
return "%s-%s-%s" % (osname, release, machine)
|
||||
|
||||
|
||||
def get_python_version():
|
||||
return _PY_VERSION_SHORT
|
||||
|
||||
|
||||
def _print_dict(title, data):
|
||||
for index, (key, value) in enumerate(sorted(data.items())):
|
||||
if index == 0:
|
||||
print('%s: ' % (title))
|
||||
print('\t%s = "%s"' % (key, value))
|
||||
|
||||
|
||||
def _main():
|
||||
"""Display all information sysconfig detains."""
|
||||
print('Platform: "%s"' % get_platform())
|
||||
print('Python version: "%s"' % get_python_version())
|
||||
print('Current installation scheme: "%s"' % _get_default_scheme())
|
||||
print()
|
||||
_print_dict('Paths', get_paths())
|
||||
print()
|
||||
_print_dict('Variables', get_config_vars())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
_main()
|
||||
2607
venv/Lib/site-packages/pip/_vendor/distlib/_backport/tarfile.py
Normal file
2607
venv/Lib/site-packages/pip/_vendor/distlib/_backport/tarfile.py
Normal file
File diff suppressed because it is too large
Load Diff
1120
venv/Lib/site-packages/pip/_vendor/distlib/compat.py
Normal file
1120
venv/Lib/site-packages/pip/_vendor/distlib/compat.py
Normal file
File diff suppressed because it is too large
Load Diff
1339
venv/Lib/site-packages/pip/_vendor/distlib/database.py
Normal file
1339
venv/Lib/site-packages/pip/_vendor/distlib/database.py
Normal file
File diff suppressed because it is too large
Load Diff
509
venv/Lib/site-packages/pip/_vendor/distlib/index.py
Normal file
509
venv/Lib/site-packages/pip/_vendor/distlib/index.py
Normal file
@@ -0,0 +1,509 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2013 Vinay Sajip.
|
||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
try:
|
||||
from threading import Thread
|
||||
except ImportError:
|
||||
from dummy_threading import Thread
|
||||
|
||||
from . import DistlibException
|
||||
from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
|
||||
urlparse, build_opener, string_types)
|
||||
from .util import zip_dir, ServerProxy
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_INDEX = 'https://pypi.org/pypi'
|
||||
DEFAULT_REALM = 'pypi'
|
||||
|
||||
class PackageIndex(object):
|
||||
"""
|
||||
This class represents a package index compatible with PyPI, the Python
|
||||
Package Index.
|
||||
"""
|
||||
|
||||
boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
|
||||
|
||||
def __init__(self, url=None):
|
||||
"""
|
||||
Initialise an instance.
|
||||
|
||||
:param url: The URL of the index. If not specified, the URL for PyPI is
|
||||
used.
|
||||
"""
|
||||
self.url = url or DEFAULT_INDEX
|
||||
self.read_configuration()
|
||||
scheme, netloc, path, params, query, frag = urlparse(self.url)
|
||||
if params or query or frag or scheme not in ('http', 'https'):
|
||||
raise DistlibException('invalid repository: %s' % self.url)
|
||||
self.password_handler = None
|
||||
self.ssl_verifier = None
|
||||
self.gpg = None
|
||||
self.gpg_home = None
|
||||
with open(os.devnull, 'w') as sink:
|
||||
# Use gpg by default rather than gpg2, as gpg2 insists on
|
||||
# prompting for passwords
|
||||
for s in ('gpg', 'gpg2'):
|
||||
try:
|
||||
rc = subprocess.check_call([s, '--version'], stdout=sink,
|
||||
stderr=sink)
|
||||
if rc == 0:
|
||||
self.gpg = s
|
||||
break
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _get_pypirc_command(self):
|
||||
"""
|
||||
Get the distutils command for interacting with PyPI configurations.
|
||||
:return: the command.
|
||||
"""
|
||||
from .util import _get_pypirc_command as cmd
|
||||
return cmd()
|
||||
|
||||
def read_configuration(self):
|
||||
"""
|
||||
Read the PyPI access configuration as supported by distutils. This populates
|
||||
``username``, ``password``, ``realm`` and ``url`` attributes from the
|
||||
configuration.
|
||||
"""
|
||||
from .util import _load_pypirc
|
||||
cfg = _load_pypirc(self)
|
||||
self.username = cfg.get('username')
|
||||
self.password = cfg.get('password')
|
||||
self.realm = cfg.get('realm', 'pypi')
|
||||
self.url = cfg.get('repository', self.url)
|
||||
|
||||
def save_configuration(self):
|
||||
"""
|
||||
Save the PyPI access configuration. You must have set ``username`` and
|
||||
``password`` attributes before calling this method.
|
||||
"""
|
||||
self.check_credentials()
|
||||
from .util import _store_pypirc
|
||||
_store_pypirc(self)
|
||||
|
||||
def check_credentials(self):
|
||||
"""
|
||||
Check that ``username`` and ``password`` have been set, and raise an
|
||||
exception if not.
|
||||
"""
|
||||
if self.username is None or self.password is None:
|
||||
raise DistlibException('username and password must be set')
|
||||
pm = HTTPPasswordMgr()
|
||||
_, netloc, _, _, _, _ = urlparse(self.url)
|
||||
pm.add_password(self.realm, netloc, self.username, self.password)
|
||||
self.password_handler = HTTPBasicAuthHandler(pm)
|
||||
|
||||
def register(self, metadata):
|
||||
"""
|
||||
Register a distribution on PyPI, using the provided metadata.
|
||||
|
||||
:param metadata: A :class:`Metadata` instance defining at least a name
|
||||
and version number for the distribution to be
|
||||
registered.
|
||||
:return: The HTTP response received from PyPI upon submission of the
|
||||
request.
|
||||
"""
|
||||
self.check_credentials()
|
||||
metadata.validate()
|
||||
d = metadata.todict()
|
||||
d[':action'] = 'verify'
|
||||
request = self.encode_request(d.items(), [])
|
||||
response = self.send_request(request)
|
||||
d[':action'] = 'submit'
|
||||
request = self.encode_request(d.items(), [])
|
||||
return self.send_request(request)
|
||||
|
||||
def _reader(self, name, stream, outbuf):
|
||||
"""
|
||||
Thread runner for reading lines of from a subprocess into a buffer.
|
||||
|
||||
:param name: The logical name of the stream (used for logging only).
|
||||
:param stream: The stream to read from. This will typically a pipe
|
||||
connected to the output stream of a subprocess.
|
||||
:param outbuf: The list to append the read lines to.
|
||||
"""
|
||||
while True:
|
||||
s = stream.readline()
|
||||
if not s:
|
||||
break
|
||||
s = s.decode('utf-8').rstrip()
|
||||
outbuf.append(s)
|
||||
logger.debug('%s: %s' % (name, s))
|
||||
stream.close()
|
||||
|
||||
def get_sign_command(self, filename, signer, sign_password,
|
||||
keystore=None):
|
||||
"""
|
||||
Return a suitable command for signing a file.
|
||||
|
||||
:param filename: The pathname to the file to be signed.
|
||||
:param signer: The identifier of the signer of the file.
|
||||
:param sign_password: The passphrase for the signer's
|
||||
private key used for signing.
|
||||
:param keystore: The path to a directory which contains the keys
|
||||
used in verification. If not specified, the
|
||||
instance's ``gpg_home`` attribute is used instead.
|
||||
:return: The signing command as a list suitable to be
|
||||
passed to :class:`subprocess.Popen`.
|
||||
"""
|
||||
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
||||
if keystore is None:
|
||||
keystore = self.gpg_home
|
||||
if keystore:
|
||||
cmd.extend(['--homedir', keystore])
|
||||
if sign_password is not None:
|
||||
cmd.extend(['--batch', '--passphrase-fd', '0'])
|
||||
td = tempfile.mkdtemp()
|
||||
sf = os.path.join(td, os.path.basename(filename) + '.asc')
|
||||
cmd.extend(['--detach-sign', '--armor', '--local-user',
|
||||
signer, '--output', sf, filename])
|
||||
logger.debug('invoking: %s', ' '.join(cmd))
|
||||
return cmd, sf
|
||||
|
||||
def run_command(self, cmd, input_data=None):
|
||||
"""
|
||||
Run a command in a child process , passing it any input data specified.
|
||||
|
||||
:param cmd: The command to run.
|
||||
:param input_data: If specified, this must be a byte string containing
|
||||
data to be sent to the child process.
|
||||
:return: A tuple consisting of the subprocess' exit code, a list of
|
||||
lines read from the subprocess' ``stdout``, and a list of
|
||||
lines read from the subprocess' ``stderr``.
|
||||
"""
|
||||
kwargs = {
|
||||
'stdout': subprocess.PIPE,
|
||||
'stderr': subprocess.PIPE,
|
||||
}
|
||||
if input_data is not None:
|
||||
kwargs['stdin'] = subprocess.PIPE
|
||||
stdout = []
|
||||
stderr = []
|
||||
p = subprocess.Popen(cmd, **kwargs)
|
||||
# We don't use communicate() here because we may need to
|
||||
# get clever with interacting with the command
|
||||
t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
|
||||
t1.start()
|
||||
t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
|
||||
t2.start()
|
||||
if input_data is not None:
|
||||
p.stdin.write(input_data)
|
||||
p.stdin.close()
|
||||
|
||||
p.wait()
|
||||
t1.join()
|
||||
t2.join()
|
||||
return p.returncode, stdout, stderr
|
||||
|
||||
def sign_file(self, filename, signer, sign_password, keystore=None):
|
||||
"""
|
||||
Sign a file.
|
||||
|
||||
:param filename: The pathname to the file to be signed.
|
||||
:param signer: The identifier of the signer of the file.
|
||||
:param sign_password: The passphrase for the signer's
|
||||
private key used for signing.
|
||||
:param keystore: The path to a directory which contains the keys
|
||||
used in signing. If not specified, the instance's
|
||||
``gpg_home`` attribute is used instead.
|
||||
:return: The absolute pathname of the file where the signature is
|
||||
stored.
|
||||
"""
|
||||
cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
|
||||
keystore)
|
||||
rc, stdout, stderr = self.run_command(cmd,
|
||||
sign_password.encode('utf-8'))
|
||||
if rc != 0:
|
||||
raise DistlibException('sign command failed with error '
|
||||
'code %s' % rc)
|
||||
return sig_file
|
||||
|
||||
def upload_file(self, metadata, filename, signer=None, sign_password=None,
|
||||
filetype='sdist', pyversion='source', keystore=None):
|
||||
"""
|
||||
Upload a release file to the index.
|
||||
|
||||
:param metadata: A :class:`Metadata` instance defining at least a name
|
||||
and version number for the file to be uploaded.
|
||||
:param filename: The pathname of the file to be uploaded.
|
||||
:param signer: The identifier of the signer of the file.
|
||||
:param sign_password: The passphrase for the signer's
|
||||
private key used for signing.
|
||||
:param filetype: The type of the file being uploaded. This is the
|
||||
distutils command which produced that file, e.g.
|
||||
``sdist`` or ``bdist_wheel``.
|
||||
:param pyversion: The version of Python which the release relates
|
||||
to. For code compatible with any Python, this would
|
||||
be ``source``, otherwise it would be e.g. ``3.2``.
|
||||
:param keystore: The path to a directory which contains the keys
|
||||
used in signing. If not specified, the instance's
|
||||
``gpg_home`` attribute is used instead.
|
||||
:return: The HTTP response received from PyPI upon submission of the
|
||||
request.
|
||||
"""
|
||||
self.check_credentials()
|
||||
if not os.path.exists(filename):
|
||||
raise DistlibException('not found: %s' % filename)
|
||||
metadata.validate()
|
||||
d = metadata.todict()
|
||||
sig_file = None
|
||||
if signer:
|
||||
if not self.gpg:
|
||||
logger.warning('no signing program available - not signed')
|
||||
else:
|
||||
sig_file = self.sign_file(filename, signer, sign_password,
|
||||
keystore)
|
||||
with open(filename, 'rb') as f:
|
||||
file_data = f.read()
|
||||
md5_digest = hashlib.md5(file_data).hexdigest()
|
||||
sha256_digest = hashlib.sha256(file_data).hexdigest()
|
||||
d.update({
|
||||
':action': 'file_upload',
|
||||
'protocol_version': '1',
|
||||
'filetype': filetype,
|
||||
'pyversion': pyversion,
|
||||
'md5_digest': md5_digest,
|
||||
'sha256_digest': sha256_digest,
|
||||
})
|
||||
files = [('content', os.path.basename(filename), file_data)]
|
||||
if sig_file:
|
||||
with open(sig_file, 'rb') as f:
|
||||
sig_data = f.read()
|
||||
files.append(('gpg_signature', os.path.basename(sig_file),
|
||||
sig_data))
|
||||
shutil.rmtree(os.path.dirname(sig_file))
|
||||
request = self.encode_request(d.items(), files)
|
||||
return self.send_request(request)
|
||||
|
||||
def upload_documentation(self, metadata, doc_dir):
|
||||
"""
|
||||
Upload documentation to the index.
|
||||
|
||||
:param metadata: A :class:`Metadata` instance defining at least a name
|
||||
and version number for the documentation to be
|
||||
uploaded.
|
||||
:param doc_dir: The pathname of the directory which contains the
|
||||
documentation. This should be the directory that
|
||||
contains the ``index.html`` for the documentation.
|
||||
:return: The HTTP response received from PyPI upon submission of the
|
||||
request.
|
||||
"""
|
||||
self.check_credentials()
|
||||
if not os.path.isdir(doc_dir):
|
||||
raise DistlibException('not a directory: %r' % doc_dir)
|
||||
fn = os.path.join(doc_dir, 'index.html')
|
||||
if not os.path.exists(fn):
|
||||
raise DistlibException('not found: %r' % fn)
|
||||
metadata.validate()
|
||||
name, version = metadata.name, metadata.version
|
||||
zip_data = zip_dir(doc_dir).getvalue()
|
||||
fields = [(':action', 'doc_upload'),
|
||||
('name', name), ('version', version)]
|
||||
files = [('content', name, zip_data)]
|
||||
request = self.encode_request(fields, files)
|
||||
return self.send_request(request)
|
||||
|
||||
def get_verify_command(self, signature_filename, data_filename,
|
||||
keystore=None):
|
||||
"""
|
||||
Return a suitable command for verifying a file.
|
||||
|
||||
:param signature_filename: The pathname to the file containing the
|
||||
signature.
|
||||
:param data_filename: The pathname to the file containing the
|
||||
signed data.
|
||||
:param keystore: The path to a directory which contains the keys
|
||||
used in verification. If not specified, the
|
||||
instance's ``gpg_home`` attribute is used instead.
|
||||
:return: The verifying command as a list suitable to be
|
||||
passed to :class:`subprocess.Popen`.
|
||||
"""
|
||||
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
||||
if keystore is None:
|
||||
keystore = self.gpg_home
|
||||
if keystore:
|
||||
cmd.extend(['--homedir', keystore])
|
||||
cmd.extend(['--verify', signature_filename, data_filename])
|
||||
logger.debug('invoking: %s', ' '.join(cmd))
|
||||
return cmd
|
||||
|
||||
def verify_signature(self, signature_filename, data_filename,
|
||||
keystore=None):
|
||||
"""
|
||||
Verify a signature for a file.
|
||||
|
||||
:param signature_filename: The pathname to the file containing the
|
||||
signature.
|
||||
:param data_filename: The pathname to the file containing the
|
||||
signed data.
|
||||
:param keystore: The path to a directory which contains the keys
|
||||
used in verification. If not specified, the
|
||||
instance's ``gpg_home`` attribute is used instead.
|
||||
:return: True if the signature was verified, else False.
|
||||
"""
|
||||
if not self.gpg:
|
||||
raise DistlibException('verification unavailable because gpg '
|
||||
'unavailable')
|
||||
cmd = self.get_verify_command(signature_filename, data_filename,
|
||||
keystore)
|
||||
rc, stdout, stderr = self.run_command(cmd)
|
||||
if rc not in (0, 1):
|
||||
raise DistlibException('verify command failed with error '
|
||||
'code %s' % rc)
|
||||
return rc == 0
|
||||
|
||||
def download_file(self, url, destfile, digest=None, reporthook=None):
|
||||
"""
|
||||
This is a convenience method for downloading a file from an URL.
|
||||
Normally, this will be a file from the index, though currently
|
||||
no check is made for this (i.e. a file can be downloaded from
|
||||
anywhere).
|
||||
|
||||
The method is just like the :func:`urlretrieve` function in the
|
||||
standard library, except that it allows digest computation to be
|
||||
done during download and checking that the downloaded data
|
||||
matched any expected value.
|
||||
|
||||
:param url: The URL of the file to be downloaded (assumed to be
|
||||
available via an HTTP GET request).
|
||||
:param destfile: The pathname where the downloaded file is to be
|
||||
saved.
|
||||
:param digest: If specified, this must be a (hasher, value)
|
||||
tuple, where hasher is the algorithm used (e.g.
|
||||
``'md5'``) and ``value`` is the expected value.
|
||||
:param reporthook: The same as for :func:`urlretrieve` in the
|
||||
standard library.
|
||||
"""
|
||||
if digest is None:
|
||||
digester = None
|
||||
logger.debug('No digest specified')
|
||||
else:
|
||||
if isinstance(digest, (list, tuple)):
|
||||
hasher, digest = digest
|
||||
else:
|
||||
hasher = 'md5'
|
||||
digester = getattr(hashlib, hasher)()
|
||||
logger.debug('Digest specified: %s' % digest)
|
||||
# The following code is equivalent to urlretrieve.
|
||||
# We need to do it this way so that we can compute the
|
||||
# digest of the file as we go.
|
||||
with open(destfile, 'wb') as dfp:
|
||||
# addinfourl is not a context manager on 2.x
|
||||
# so we have to use try/finally
|
||||
sfp = self.send_request(Request(url))
|
||||
try:
|
||||
headers = sfp.info()
|
||||
blocksize = 8192
|
||||
size = -1
|
||||
read = 0
|
||||
blocknum = 0
|
||||
if "content-length" in headers:
|
||||
size = int(headers["Content-Length"])
|
||||
if reporthook:
|
||||
reporthook(blocknum, blocksize, size)
|
||||
while True:
|
||||
block = sfp.read(blocksize)
|
||||
if not block:
|
||||
break
|
||||
read += len(block)
|
||||
dfp.write(block)
|
||||
if digester:
|
||||
digester.update(block)
|
||||
blocknum += 1
|
||||
if reporthook:
|
||||
reporthook(blocknum, blocksize, size)
|
||||
finally:
|
||||
sfp.close()
|
||||
|
||||
# check that we got the whole file, if we can
|
||||
if size >= 0 and read < size:
|
||||
raise DistlibException(
|
||||
'retrieval incomplete: got only %d out of %d bytes'
|
||||
% (read, size))
|
||||
# if we have a digest, it must match.
|
||||
if digester:
|
||||
actual = digester.hexdigest()
|
||||
if digest != actual:
|
||||
raise DistlibException('%s digest mismatch for %s: expected '
|
||||
'%s, got %s' % (hasher, destfile,
|
||||
digest, actual))
|
||||
logger.debug('Digest verified: %s', digest)
|
||||
|
||||
def send_request(self, req):
|
||||
"""
|
||||
Send a standard library :class:`Request` to PyPI and return its
|
||||
response.
|
||||
|
||||
:param req: The request to send.
|
||||
:return: The HTTP response from PyPI (a standard library HTTPResponse).
|
||||
"""
|
||||
handlers = []
|
||||
if self.password_handler:
|
||||
handlers.append(self.password_handler)
|
||||
if self.ssl_verifier:
|
||||
handlers.append(self.ssl_verifier)
|
||||
opener = build_opener(*handlers)
|
||||
return opener.open(req)
|
||||
|
||||
def encode_request(self, fields, files):
|
||||
"""
|
||||
Encode fields and files for posting to an HTTP server.
|
||||
|
||||
:param fields: The fields to send as a list of (fieldname, value)
|
||||
tuples.
|
||||
:param files: The files to send as a list of (fieldname, filename,
|
||||
file_bytes) tuple.
|
||||
"""
|
||||
# Adapted from packaging, which in turn was adapted from
|
||||
# http://code.activestate.com/recipes/146306
|
||||
|
||||
parts = []
|
||||
boundary = self.boundary
|
||||
for k, values in fields:
|
||||
if not isinstance(values, (list, tuple)):
|
||||
values = [values]
|
||||
|
||||
for v in values:
|
||||
parts.extend((
|
||||
b'--' + boundary,
|
||||
('Content-Disposition: form-data; name="%s"' %
|
||||
k).encode('utf-8'),
|
||||
b'',
|
||||
v.encode('utf-8')))
|
||||
for key, filename, value in files:
|
||||
parts.extend((
|
||||
b'--' + boundary,
|
||||
('Content-Disposition: form-data; name="%s"; filename="%s"' %
|
||||
(key, filename)).encode('utf-8'),
|
||||
b'',
|
||||
value))
|
||||
|
||||
parts.extend((b'--' + boundary + b'--', b''))
|
||||
|
||||
body = b'\r\n'.join(parts)
|
||||
ct = b'multipart/form-data; boundary=' + boundary
|
||||
headers = {
|
||||
'Content-type': ct,
|
||||
'Content-length': str(len(body))
|
||||
}
|
||||
return Request(self.url, body, headers)
|
||||
|
||||
def search(self, terms, operator=None):
|
||||
if isinstance(terms, string_types):
|
||||
terms = {'name': terms}
|
||||
rpc_proxy = ServerProxy(self.url, timeout=3.0)
|
||||
try:
|
||||
return rpc_proxy.search(terms, operator or 'and')
|
||||
finally:
|
||||
rpc_proxy('close')()
|
||||
1300
venv/Lib/site-packages/pip/_vendor/distlib/locators.py
Normal file
1300
venv/Lib/site-packages/pip/_vendor/distlib/locators.py
Normal file
File diff suppressed because it is too large
Load Diff
393
venv/Lib/site-packages/pip/_vendor/distlib/manifest.py
Normal file
393
venv/Lib/site-packages/pip/_vendor/distlib/manifest.py
Normal file
@@ -0,0 +1,393 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012-2013 Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""
|
||||
Class representing the list of files in a distribution.
|
||||
|
||||
Equivalent to distutils.filelist, but fixes some problems.
|
||||
"""
|
||||
import fnmatch
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from . import DistlibException
|
||||
from .compat import fsdecode
|
||||
from .util import convert_path
|
||||
|
||||
|
||||
__all__ = ['Manifest']
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# a \ followed by some spaces + EOL
|
||||
_COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M)
|
||||
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
|
||||
|
||||
#
|
||||
# Due to the different results returned by fnmatch.translate, we need
|
||||
# to do slightly different processing for Python 2.7 and 3.2 ... this needed
|
||||
# to be brought in for Python 3.6 onwards.
|
||||
#
|
||||
_PYTHON_VERSION = sys.version_info[:2]
|
||||
|
||||
class Manifest(object):
|
||||
"""A list of files built by on exploring the filesystem and filtered by
|
||||
applying various patterns to what we find there.
|
||||
"""
|
||||
|
||||
def __init__(self, base=None):
|
||||
"""
|
||||
Initialise an instance.
|
||||
|
||||
:param base: The base directory to explore under.
|
||||
"""
|
||||
self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
|
||||
self.prefix = self.base + os.sep
|
||||
self.allfiles = None
|
||||
self.files = set()
|
||||
|
||||
#
|
||||
# Public API
|
||||
#
|
||||
|
||||
def findall(self):
|
||||
"""Find all files under the base and set ``allfiles`` to the absolute
|
||||
pathnames of files found.
|
||||
"""
|
||||
from stat import S_ISREG, S_ISDIR, S_ISLNK
|
||||
|
||||
self.allfiles = allfiles = []
|
||||
root = self.base
|
||||
stack = [root]
|
||||
pop = stack.pop
|
||||
push = stack.append
|
||||
|
||||
while stack:
|
||||
root = pop()
|
||||
names = os.listdir(root)
|
||||
|
||||
for name in names:
|
||||
fullname = os.path.join(root, name)
|
||||
|
||||
# Avoid excess stat calls -- just one will do, thank you!
|
||||
stat = os.stat(fullname)
|
||||
mode = stat.st_mode
|
||||
if S_ISREG(mode):
|
||||
allfiles.append(fsdecode(fullname))
|
||||
elif S_ISDIR(mode) and not S_ISLNK(mode):
|
||||
push(fullname)
|
||||
|
||||
def add(self, item):
|
||||
"""
|
||||
Add a file to the manifest.
|
||||
|
||||
:param item: The pathname to add. This can be relative to the base.
|
||||
"""
|
||||
if not item.startswith(self.prefix):
|
||||
item = os.path.join(self.base, item)
|
||||
self.files.add(os.path.normpath(item))
|
||||
|
||||
def add_many(self, items):
|
||||
"""
|
||||
Add a list of files to the manifest.
|
||||
|
||||
:param items: The pathnames to add. These can be relative to the base.
|
||||
"""
|
||||
for item in items:
|
||||
self.add(item)
|
||||
|
||||
def sorted(self, wantdirs=False):
|
||||
"""
|
||||
Return sorted files in directory order
|
||||
"""
|
||||
|
||||
def add_dir(dirs, d):
|
||||
dirs.add(d)
|
||||
logger.debug('add_dir added %s', d)
|
||||
if d != self.base:
|
||||
parent, _ = os.path.split(d)
|
||||
assert parent not in ('', '/')
|
||||
add_dir(dirs, parent)
|
||||
|
||||
result = set(self.files) # make a copy!
|
||||
if wantdirs:
|
||||
dirs = set()
|
||||
for f in result:
|
||||
add_dir(dirs, os.path.dirname(f))
|
||||
result |= dirs
|
||||
return [os.path.join(*path_tuple) for path_tuple in
|
||||
sorted(os.path.split(path) for path in result)]
|
||||
|
||||
def clear(self):
|
||||
"""Clear all collected files."""
|
||||
self.files = set()
|
||||
self.allfiles = []
|
||||
|
||||
def process_directive(self, directive):
|
||||
"""
|
||||
Process a directive which either adds some files from ``allfiles`` to
|
||||
``files``, or removes some files from ``files``.
|
||||
|
||||
:param directive: The directive to process. This should be in a format
|
||||
compatible with distutils ``MANIFEST.in`` files:
|
||||
|
||||
http://docs.python.org/distutils/sourcedist.html#commands
|
||||
"""
|
||||
# Parse the line: split it up, make sure the right number of words
|
||||
# is there, and return the relevant words. 'action' is always
|
||||
# defined: it's the first word of the line. Which of the other
|
||||
# three are defined depends on the action; it'll be either
|
||||
# patterns, (dir and patterns), or (dirpattern).
|
||||
action, patterns, thedir, dirpattern = self._parse_directive(directive)
|
||||
|
||||
# OK, now we know that the action is valid and we have the
|
||||
# right number of words on the line for that action -- so we
|
||||
# can proceed with minimal error-checking.
|
||||
if action == 'include':
|
||||
for pattern in patterns:
|
||||
if not self._include_pattern(pattern, anchor=True):
|
||||
logger.warning('no files found matching %r', pattern)
|
||||
|
||||
elif action == 'exclude':
|
||||
for pattern in patterns:
|
||||
found = self._exclude_pattern(pattern, anchor=True)
|
||||
#if not found:
|
||||
# logger.warning('no previously-included files '
|
||||
# 'found matching %r', pattern)
|
||||
|
||||
elif action == 'global-include':
|
||||
for pattern in patterns:
|
||||
if not self._include_pattern(pattern, anchor=False):
|
||||
logger.warning('no files found matching %r '
|
||||
'anywhere in distribution', pattern)
|
||||
|
||||
elif action == 'global-exclude':
|
||||
for pattern in patterns:
|
||||
found = self._exclude_pattern(pattern, anchor=False)
|
||||
#if not found:
|
||||
# logger.warning('no previously-included files '
|
||||
# 'matching %r found anywhere in '
|
||||
# 'distribution', pattern)
|
||||
|
||||
elif action == 'recursive-include':
|
||||
for pattern in patterns:
|
||||
if not self._include_pattern(pattern, prefix=thedir):
|
||||
logger.warning('no files found matching %r '
|
||||
'under directory %r', pattern, thedir)
|
||||
|
||||
elif action == 'recursive-exclude':
|
||||
for pattern in patterns:
|
||||
found = self._exclude_pattern(pattern, prefix=thedir)
|
||||
#if not found:
|
||||
# logger.warning('no previously-included files '
|
||||
# 'matching %r found under directory %r',
|
||||
# pattern, thedir)
|
||||
|
||||
elif action == 'graft':
|
||||
if not self._include_pattern(None, prefix=dirpattern):
|
||||
logger.warning('no directories found matching %r',
|
||||
dirpattern)
|
||||
|
||||
elif action == 'prune':
|
||||
if not self._exclude_pattern(None, prefix=dirpattern):
|
||||
logger.warning('no previously-included directories found '
|
||||
'matching %r', dirpattern)
|
||||
else: # pragma: no cover
|
||||
# This should never happen, as it should be caught in
|
||||
# _parse_template_line
|
||||
raise DistlibException(
|
||||
'invalid action %r' % action)
|
||||
|
||||
#
|
||||
# Private API
|
||||
#
|
||||
|
||||
def _parse_directive(self, directive):
|
||||
"""
|
||||
Validate a directive.
|
||||
:param directive: The directive to validate.
|
||||
:return: A tuple of action, patterns, thedir, dir_patterns
|
||||
"""
|
||||
words = directive.split()
|
||||
if len(words) == 1 and words[0] not in ('include', 'exclude',
|
||||
'global-include',
|
||||
'global-exclude',
|
||||
'recursive-include',
|
||||
'recursive-exclude',
|
||||
'graft', 'prune'):
|
||||
# no action given, let's use the default 'include'
|
||||
words.insert(0, 'include')
|
||||
|
||||
action = words[0]
|
||||
patterns = thedir = dir_pattern = None
|
||||
|
||||
if action in ('include', 'exclude',
|
||||
'global-include', 'global-exclude'):
|
||||
if len(words) < 2:
|
||||
raise DistlibException(
|
||||
'%r expects <pattern1> <pattern2> ...' % action)
|
||||
|
||||
patterns = [convert_path(word) for word in words[1:]]
|
||||
|
||||
elif action in ('recursive-include', 'recursive-exclude'):
|
||||
if len(words) < 3:
|
||||
raise DistlibException(
|
||||
'%r expects <dir> <pattern1> <pattern2> ...' % action)
|
||||
|
||||
thedir = convert_path(words[1])
|
||||
patterns = [convert_path(word) for word in words[2:]]
|
||||
|
||||
elif action in ('graft', 'prune'):
|
||||
if len(words) != 2:
|
||||
raise DistlibException(
|
||||
'%r expects a single <dir_pattern>' % action)
|
||||
|
||||
dir_pattern = convert_path(words[1])
|
||||
|
||||
else:
|
||||
raise DistlibException('unknown action %r' % action)
|
||||
|
||||
return action, patterns, thedir, dir_pattern
|
||||
|
||||
def _include_pattern(self, pattern, anchor=True, prefix=None,
|
||||
is_regex=False):
|
||||
"""Select strings (presumably filenames) from 'self.files' that
|
||||
match 'pattern', a Unix-style wildcard (glob) pattern.
|
||||
|
||||
Patterns are not quite the same as implemented by the 'fnmatch'
|
||||
module: '*' and '?' match non-special characters, where "special"
|
||||
is platform-dependent: slash on Unix; colon, slash, and backslash on
|
||||
DOS/Windows; and colon on Mac OS.
|
||||
|
||||
If 'anchor' is true (the default), then the pattern match is more
|
||||
stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
|
||||
'anchor' is false, both of these will match.
|
||||
|
||||
If 'prefix' is supplied, then only filenames starting with 'prefix'
|
||||
(itself a pattern) and ending with 'pattern', with anything in between
|
||||
them, will match. 'anchor' is ignored in this case.
|
||||
|
||||
If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
|
||||
'pattern' is assumed to be either a string containing a regex or a
|
||||
regex object -- no translation is done, the regex is just compiled
|
||||
and used as-is.
|
||||
|
||||
Selected strings will be added to self.files.
|
||||
|
||||
Return True if files are found.
|
||||
"""
|
||||
# XXX docstring lying about what the special chars are?
|
||||
found = False
|
||||
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
||||
|
||||
# delayed loading of allfiles list
|
||||
if self.allfiles is None:
|
||||
self.findall()
|
||||
|
||||
for name in self.allfiles:
|
||||
if pattern_re.search(name):
|
||||
self.files.add(name)
|
||||
found = True
|
||||
return found
|
||||
|
||||
def _exclude_pattern(self, pattern, anchor=True, prefix=None,
|
||||
is_regex=False):
|
||||
"""Remove strings (presumably filenames) from 'files' that match
|
||||
'pattern'.
|
||||
|
||||
Other parameters are the same as for 'include_pattern()', above.
|
||||
The list 'self.files' is modified in place. Return True if files are
|
||||
found.
|
||||
|
||||
This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
|
||||
packaging source distributions
|
||||
"""
|
||||
found = False
|
||||
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
||||
for f in list(self.files):
|
||||
if pattern_re.search(f):
|
||||
self.files.remove(f)
|
||||
found = True
|
||||
return found
|
||||
|
||||
def _translate_pattern(self, pattern, anchor=True, prefix=None,
|
||||
is_regex=False):
|
||||
"""Translate a shell-like wildcard pattern to a compiled regular
|
||||
expression.
|
||||
|
||||
Return the compiled regex. If 'is_regex' true,
|
||||
then 'pattern' is directly compiled to a regex (if it's a string)
|
||||
or just returned as-is (assumes it's a regex object).
|
||||
"""
|
||||
if is_regex:
|
||||
if isinstance(pattern, str):
|
||||
return re.compile(pattern)
|
||||
else:
|
||||
return pattern
|
||||
|
||||
if _PYTHON_VERSION > (3, 2):
|
||||
# ditch start and end characters
|
||||
start, _, end = self._glob_to_re('_').partition('_')
|
||||
|
||||
if pattern:
|
||||
pattern_re = self._glob_to_re(pattern)
|
||||
if _PYTHON_VERSION > (3, 2):
|
||||
assert pattern_re.startswith(start) and pattern_re.endswith(end)
|
||||
else:
|
||||
pattern_re = ''
|
||||
|
||||
base = re.escape(os.path.join(self.base, ''))
|
||||
if prefix is not None:
|
||||
# ditch end of pattern character
|
||||
if _PYTHON_VERSION <= (3, 2):
|
||||
empty_pattern = self._glob_to_re('')
|
||||
prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
|
||||
else:
|
||||
prefix_re = self._glob_to_re(prefix)
|
||||
assert prefix_re.startswith(start) and prefix_re.endswith(end)
|
||||
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
|
||||
sep = os.sep
|
||||
if os.sep == '\\':
|
||||
sep = r'\\'
|
||||
if _PYTHON_VERSION <= (3, 2):
|
||||
pattern_re = '^' + base + sep.join((prefix_re,
|
||||
'.*' + pattern_re))
|
||||
else:
|
||||
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
|
||||
pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
|
||||
pattern_re, end)
|
||||
else: # no prefix -- respect anchor flag
|
||||
if anchor:
|
||||
if _PYTHON_VERSION <= (3, 2):
|
||||
pattern_re = '^' + base + pattern_re
|
||||
else:
|
||||
pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
|
||||
|
||||
return re.compile(pattern_re)
|
||||
|
||||
def _glob_to_re(self, pattern):
|
||||
"""Translate a shell-like glob pattern to a regular expression.
|
||||
|
||||
Return a string containing the regex. Differs from
|
||||
'fnmatch.translate()' in that '*' does not match "special characters"
|
||||
(which are platform-specific).
|
||||
"""
|
||||
pattern_re = fnmatch.translate(pattern)
|
||||
|
||||
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
|
||||
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
|
||||
# and by extension they shouldn't match such "special characters" under
|
||||
# any OS. So change all non-escaped dots in the RE to match any
|
||||
# character except the special characters (currently: just os.sep).
|
||||
sep = os.sep
|
||||
if os.sep == '\\':
|
||||
# we're using a regex to manipulate a regex, so we need
|
||||
# to escape the backslash twice
|
||||
sep = r'\\\\'
|
||||
escaped = r'\1[^%s]' % sep
|
||||
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
|
||||
return pattern_re
|
||||
130
venv/Lib/site-packages/pip/_vendor/distlib/markers.py
Normal file
130
venv/Lib/site-packages/pip/_vendor/distlib/markers.py
Normal file
@@ -0,0 +1,130 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012-2017 Vinay Sajip.
|
||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""
|
||||
Parser for the environment markers micro-language defined in PEP 508.
|
||||
"""
|
||||
|
||||
# Note: In PEP 345, the micro-language was Python compatible, so the ast
|
||||
# module could be used to parse it. However, PEP 508 introduced operators such
|
||||
# as ~= and === which aren't in Python, necessitating a different approach.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
|
||||
from .compat import string_types
|
||||
from .util import in_venv, parse_marker
|
||||
|
||||
__all__ = ['interpret']
|
||||
|
||||
def _is_literal(o):
|
||||
if not isinstance(o, string_types) or not o:
|
||||
return False
|
||||
return o[0] in '\'"'
|
||||
|
||||
class Evaluator(object):
|
||||
"""
|
||||
This class is used to evaluate marker expessions.
|
||||
"""
|
||||
|
||||
operations = {
|
||||
'==': lambda x, y: x == y,
|
||||
'===': lambda x, y: x == y,
|
||||
'~=': lambda x, y: x == y or x > y,
|
||||
'!=': lambda x, y: x != y,
|
||||
'<': lambda x, y: x < y,
|
||||
'<=': lambda x, y: x == y or x < y,
|
||||
'>': lambda x, y: x > y,
|
||||
'>=': lambda x, y: x == y or x > y,
|
||||
'and': lambda x, y: x and y,
|
||||
'or': lambda x, y: x or y,
|
||||
'in': lambda x, y: x in y,
|
||||
'not in': lambda x, y: x not in y,
|
||||
}
|
||||
|
||||
def evaluate(self, expr, context):
|
||||
"""
|
||||
Evaluate a marker expression returned by the :func:`parse_requirement`
|
||||
function in the specified context.
|
||||
"""
|
||||
if isinstance(expr, string_types):
|
||||
if expr[0] in '\'"':
|
||||
result = expr[1:-1]
|
||||
else:
|
||||
if expr not in context:
|
||||
raise SyntaxError('unknown variable: %s' % expr)
|
||||
result = context[expr]
|
||||
else:
|
||||
assert isinstance(expr, dict)
|
||||
op = expr['op']
|
||||
if op not in self.operations:
|
||||
raise NotImplementedError('op not implemented: %s' % op)
|
||||
elhs = expr['lhs']
|
||||
erhs = expr['rhs']
|
||||
if _is_literal(expr['lhs']) and _is_literal(expr['rhs']):
|
||||
raise SyntaxError('invalid comparison: %s %s %s' % (elhs, op, erhs))
|
||||
|
||||
lhs = self.evaluate(elhs, context)
|
||||
rhs = self.evaluate(erhs, context)
|
||||
result = self.operations[op](lhs, rhs)
|
||||
return result
|
||||
|
||||
def default_context():
|
||||
def format_full_version(info):
|
||||
version = '%s.%s.%s' % (info.major, info.minor, info.micro)
|
||||
kind = info.releaselevel
|
||||
if kind != 'final':
|
||||
version += kind[0] + str(info.serial)
|
||||
return version
|
||||
|
||||
if hasattr(sys, 'implementation'):
|
||||
implementation_version = format_full_version(sys.implementation.version)
|
||||
implementation_name = sys.implementation.name
|
||||
else:
|
||||
implementation_version = '0'
|
||||
implementation_name = ''
|
||||
|
||||
result = {
|
||||
'implementation_name': implementation_name,
|
||||
'implementation_version': implementation_version,
|
||||
'os_name': os.name,
|
||||
'platform_machine': platform.machine(),
|
||||
'platform_python_implementation': platform.python_implementation(),
|
||||
'platform_release': platform.release(),
|
||||
'platform_system': platform.system(),
|
||||
'platform_version': platform.version(),
|
||||
'platform_in_venv': str(in_venv()),
|
||||
'python_full_version': platform.python_version(),
|
||||
'python_version': platform.python_version()[:3],
|
||||
'sys_platform': sys.platform,
|
||||
}
|
||||
return result
|
||||
|
||||
DEFAULT_CONTEXT = default_context()
|
||||
del default_context
|
||||
|
||||
evaluator = Evaluator()
|
||||
|
||||
def interpret(marker, execution_context=None):
|
||||
"""
|
||||
Interpret a marker and return a result depending on environment.
|
||||
|
||||
:param marker: The marker to interpret.
|
||||
:type marker: str
|
||||
:param execution_context: The context used for name lookup.
|
||||
:type execution_context: mapping
|
||||
"""
|
||||
try:
|
||||
expr, rest = parse_marker(marker)
|
||||
except Exception as e:
|
||||
raise SyntaxError('Unable to interpret marker syntax: %s: %s' % (marker, e))
|
||||
if rest and rest[0] != '#':
|
||||
raise SyntaxError('unexpected trailing data in marker: %s: %s' % (marker, rest))
|
||||
context = dict(DEFAULT_CONTEXT)
|
||||
if execution_context:
|
||||
context.update(execution_context)
|
||||
return evaluator.evaluate(expr, context)
|
||||
1058
venv/Lib/site-packages/pip/_vendor/distlib/metadata.py
Normal file
1058
venv/Lib/site-packages/pip/_vendor/distlib/metadata.py
Normal file
File diff suppressed because it is too large
Load Diff
358
venv/Lib/site-packages/pip/_vendor/distlib/resources.py
Normal file
358
venv/Lib/site-packages/pip/_vendor/distlib/resources.py
Normal file
@@ -0,0 +1,358 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2013-2017 Vinay Sajip.
|
||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import bisect
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import pkgutil
|
||||
import sys
|
||||
import types
|
||||
import zipimport
|
||||
|
||||
from . import DistlibException
|
||||
from .util import cached_property, get_cache_base, Cache
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
cache = None # created when needed
|
||||
|
||||
|
||||
class ResourceCache(Cache):
|
||||
def __init__(self, base=None):
|
||||
if base is None:
|
||||
# Use native string to avoid issues on 2.x: see Python #20140.
|
||||
base = os.path.join(get_cache_base(), str('resource-cache'))
|
||||
super(ResourceCache, self).__init__(base)
|
||||
|
||||
def is_stale(self, resource, path):
|
||||
"""
|
||||
Is the cache stale for the given resource?
|
||||
|
||||
:param resource: The :class:`Resource` being cached.
|
||||
:param path: The path of the resource in the cache.
|
||||
:return: True if the cache is stale.
|
||||
"""
|
||||
# Cache invalidation is a hard problem :-)
|
||||
return True
|
||||
|
||||
def get(self, resource):
|
||||
"""
|
||||
Get a resource into the cache,
|
||||
|
||||
:param resource: A :class:`Resource` instance.
|
||||
:return: The pathname of the resource in the cache.
|
||||
"""
|
||||
prefix, path = resource.finder.get_cache_info(resource)
|
||||
if prefix is None:
|
||||
result = path
|
||||
else:
|
||||
result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
|
||||
dirname = os.path.dirname(result)
|
||||
if not os.path.isdir(dirname):
|
||||
os.makedirs(dirname)
|
||||
if not os.path.exists(result):
|
||||
stale = True
|
||||
else:
|
||||
stale = self.is_stale(resource, path)
|
||||
if stale:
|
||||
# write the bytes of the resource to the cache location
|
||||
with open(result, 'wb') as f:
|
||||
f.write(resource.bytes)
|
||||
return result
|
||||
|
||||
|
||||
class ResourceBase(object):
|
||||
def __init__(self, finder, name):
|
||||
self.finder = finder
|
||||
self.name = name
|
||||
|
||||
|
||||
class Resource(ResourceBase):
|
||||
"""
|
||||
A class representing an in-package resource, such as a data file. This is
|
||||
not normally instantiated by user code, but rather by a
|
||||
:class:`ResourceFinder` which manages the resource.
|
||||
"""
|
||||
is_container = False # Backwards compatibility
|
||||
|
||||
def as_stream(self):
|
||||
"""
|
||||
Get the resource as a stream.
|
||||
|
||||
This is not a property to make it obvious that it returns a new stream
|
||||
each time.
|
||||
"""
|
||||
return self.finder.get_stream(self)
|
||||
|
||||
@cached_property
|
||||
def file_path(self):
|
||||
global cache
|
||||
if cache is None:
|
||||
cache = ResourceCache()
|
||||
return cache.get(self)
|
||||
|
||||
@cached_property
|
||||
def bytes(self):
|
||||
return self.finder.get_bytes(self)
|
||||
|
||||
@cached_property
|
||||
def size(self):
|
||||
return self.finder.get_size(self)
|
||||
|
||||
|
||||
class ResourceContainer(ResourceBase):
|
||||
is_container = True # Backwards compatibility
|
||||
|
||||
@cached_property
|
||||
def resources(self):
|
||||
return self.finder.get_resources(self)
|
||||
|
||||
|
||||
class ResourceFinder(object):
|
||||
"""
|
||||
Resource finder for file system resources.
|
||||
"""
|
||||
|
||||
if sys.platform.startswith('java'):
|
||||
skipped_extensions = ('.pyc', '.pyo', '.class')
|
||||
else:
|
||||
skipped_extensions = ('.pyc', '.pyo')
|
||||
|
||||
def __init__(self, module):
|
||||
self.module = module
|
||||
self.loader = getattr(module, '__loader__', None)
|
||||
self.base = os.path.dirname(getattr(module, '__file__', ''))
|
||||
|
||||
def _adjust_path(self, path):
|
||||
return os.path.realpath(path)
|
||||
|
||||
def _make_path(self, resource_name):
|
||||
# Issue #50: need to preserve type of path on Python 2.x
|
||||
# like os.path._get_sep
|
||||
if isinstance(resource_name, bytes): # should only happen on 2.x
|
||||
sep = b'/'
|
||||
else:
|
||||
sep = '/'
|
||||
parts = resource_name.split(sep)
|
||||
parts.insert(0, self.base)
|
||||
result = os.path.join(*parts)
|
||||
return self._adjust_path(result)
|
||||
|
||||
def _find(self, path):
|
||||
return os.path.exists(path)
|
||||
|
||||
def get_cache_info(self, resource):
|
||||
return None, resource.path
|
||||
|
||||
def find(self, resource_name):
|
||||
path = self._make_path(resource_name)
|
||||
if not self._find(path):
|
||||
result = None
|
||||
else:
|
||||
if self._is_directory(path):
|
||||
result = ResourceContainer(self, resource_name)
|
||||
else:
|
||||
result = Resource(self, resource_name)
|
||||
result.path = path
|
||||
return result
|
||||
|
||||
def get_stream(self, resource):
|
||||
return open(resource.path, 'rb')
|
||||
|
||||
def get_bytes(self, resource):
|
||||
with open(resource.path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def get_size(self, resource):
|
||||
return os.path.getsize(resource.path)
|
||||
|
||||
def get_resources(self, resource):
|
||||
def allowed(f):
|
||||
return (f != '__pycache__' and not
|
||||
f.endswith(self.skipped_extensions))
|
||||
return set([f for f in os.listdir(resource.path) if allowed(f)])
|
||||
|
||||
def is_container(self, resource):
|
||||
return self._is_directory(resource.path)
|
||||
|
||||
_is_directory = staticmethod(os.path.isdir)
|
||||
|
||||
def iterator(self, resource_name):
|
||||
resource = self.find(resource_name)
|
||||
if resource is not None:
|
||||
todo = [resource]
|
||||
while todo:
|
||||
resource = todo.pop(0)
|
||||
yield resource
|
||||
if resource.is_container:
|
||||
rname = resource.name
|
||||
for name in resource.resources:
|
||||
if not rname:
|
||||
new_name = name
|
||||
else:
|
||||
new_name = '/'.join([rname, name])
|
||||
child = self.find(new_name)
|
||||
if child.is_container:
|
||||
todo.append(child)
|
||||
else:
|
||||
yield child
|
||||
|
||||
|
||||
class ZipResourceFinder(ResourceFinder):
|
||||
"""
|
||||
Resource finder for resources in .zip files.
|
||||
"""
|
||||
def __init__(self, module):
|
||||
super(ZipResourceFinder, self).__init__(module)
|
||||
archive = self.loader.archive
|
||||
self.prefix_len = 1 + len(archive)
|
||||
# PyPy doesn't have a _files attr on zipimporter, and you can't set one
|
||||
if hasattr(self.loader, '_files'):
|
||||
self._files = self.loader._files
|
||||
else:
|
||||
self._files = zipimport._zip_directory_cache[archive]
|
||||
self.index = sorted(self._files)
|
||||
|
||||
def _adjust_path(self, path):
|
||||
return path
|
||||
|
||||
def _find(self, path):
|
||||
path = path[self.prefix_len:]
|
||||
if path in self._files:
|
||||
result = True
|
||||
else:
|
||||
if path and path[-1] != os.sep:
|
||||
path = path + os.sep
|
||||
i = bisect.bisect(self.index, path)
|
||||
try:
|
||||
result = self.index[i].startswith(path)
|
||||
except IndexError:
|
||||
result = False
|
||||
if not result:
|
||||
logger.debug('_find failed: %r %r', path, self.loader.prefix)
|
||||
else:
|
||||
logger.debug('_find worked: %r %r', path, self.loader.prefix)
|
||||
return result
|
||||
|
||||
def get_cache_info(self, resource):
|
||||
prefix = self.loader.archive
|
||||
path = resource.path[1 + len(prefix):]
|
||||
return prefix, path
|
||||
|
||||
def get_bytes(self, resource):
|
||||
return self.loader.get_data(resource.path)
|
||||
|
||||
def get_stream(self, resource):
|
||||
return io.BytesIO(self.get_bytes(resource))
|
||||
|
||||
def get_size(self, resource):
|
||||
path = resource.path[self.prefix_len:]
|
||||
return self._files[path][3]
|
||||
|
||||
def get_resources(self, resource):
|
||||
path = resource.path[self.prefix_len:]
|
||||
if path and path[-1] != os.sep:
|
||||
path += os.sep
|
||||
plen = len(path)
|
||||
result = set()
|
||||
i = bisect.bisect(self.index, path)
|
||||
while i < len(self.index):
|
||||
if not self.index[i].startswith(path):
|
||||
break
|
||||
s = self.index[i][plen:]
|
||||
result.add(s.split(os.sep, 1)[0]) # only immediate children
|
||||
i += 1
|
||||
return result
|
||||
|
||||
def _is_directory(self, path):
|
||||
path = path[self.prefix_len:]
|
||||
if path and path[-1] != os.sep:
|
||||
path += os.sep
|
||||
i = bisect.bisect(self.index, path)
|
||||
try:
|
||||
result = self.index[i].startswith(path)
|
||||
except IndexError:
|
||||
result = False
|
||||
return result
|
||||
|
||||
|
||||
_finder_registry = {
|
||||
type(None): ResourceFinder,
|
||||
zipimport.zipimporter: ZipResourceFinder
|
||||
}
|
||||
|
||||
try:
|
||||
# In Python 3.6, _frozen_importlib -> _frozen_importlib_external
|
||||
try:
|
||||
import _frozen_importlib_external as _fi
|
||||
except ImportError:
|
||||
import _frozen_importlib as _fi
|
||||
_finder_registry[_fi.SourceFileLoader] = ResourceFinder
|
||||
_finder_registry[_fi.FileFinder] = ResourceFinder
|
||||
# See issue #146
|
||||
_finder_registry[_fi.SourcelessFileLoader] = ResourceFinder
|
||||
del _fi
|
||||
except (ImportError, AttributeError):
|
||||
pass
|
||||
|
||||
|
||||
def register_finder(loader, finder_maker):
|
||||
_finder_registry[type(loader)] = finder_maker
|
||||
|
||||
|
||||
_finder_cache = {}
|
||||
|
||||
|
||||
def finder(package):
|
||||
"""
|
||||
Return a resource finder for a package.
|
||||
:param package: The name of the package.
|
||||
:return: A :class:`ResourceFinder` instance for the package.
|
||||
"""
|
||||
if package in _finder_cache:
|
||||
result = _finder_cache[package]
|
||||
else:
|
||||
if package not in sys.modules:
|
||||
__import__(package)
|
||||
module = sys.modules[package]
|
||||
path = getattr(module, '__path__', None)
|
||||
if path is None:
|
||||
raise DistlibException('You cannot get a finder for a module, '
|
||||
'only for a package')
|
||||
loader = getattr(module, '__loader__', None)
|
||||
finder_maker = _finder_registry.get(type(loader))
|
||||
if finder_maker is None:
|
||||
raise DistlibException('Unable to locate finder for %r' % package)
|
||||
result = finder_maker(module)
|
||||
_finder_cache[package] = result
|
||||
return result
|
||||
|
||||
|
||||
_dummy_module = types.ModuleType(str('__dummy__'))
|
||||
|
||||
|
||||
def finder_for_path(path):
|
||||
"""
|
||||
Return a resource finder for a path, which should represent a container.
|
||||
|
||||
:param path: The path.
|
||||
:return: A :class:`ResourceFinder` instance for the path.
|
||||
"""
|
||||
result = None
|
||||
# calls any path hooks, gets importer into cache
|
||||
pkgutil.get_importer(path)
|
||||
loader = sys.path_importer_cache.get(path)
|
||||
finder = _finder_registry.get(type(loader))
|
||||
if finder:
|
||||
module = _dummy_module
|
||||
module.__file__ = os.path.join(path, '')
|
||||
module.__loader__ = loader
|
||||
result = finder(module)
|
||||
return result
|
||||
423
venv/Lib/site-packages/pip/_vendor/distlib/scripts.py
Normal file
423
venv/Lib/site-packages/pip/_vendor/distlib/scripts.py
Normal file
@@ -0,0 +1,423 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2013-2015 Vinay Sajip.
|
||||
# Licensed to the Python Software Foundation under a contributor agreement.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
from io import BytesIO
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import struct
|
||||
import sys
|
||||
|
||||
from .compat import sysconfig, detect_encoding, ZipFile
|
||||
from .resources import finder
|
||||
from .util import (FileOperator, get_export_entry, convert_path,
|
||||
get_executable, in_venv)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_MANIFEST = '''
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
|
||||
<assemblyIdentity version="1.0.0.0"
|
||||
processorArchitecture="X86"
|
||||
name="%s"
|
||||
type="win32"/>
|
||||
|
||||
<!-- Identify the application security requirements. -->
|
||||
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
|
||||
<security>
|
||||
<requestedPrivileges>
|
||||
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
|
||||
</requestedPrivileges>
|
||||
</security>
|
||||
</trustInfo>
|
||||
</assembly>'''.strip()
|
||||
|
||||
# check if Python is called on the first line with this expression
|
||||
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
|
||||
SCRIPT_TEMPLATE = r'''# -*- coding: utf-8 -*-
|
||||
import re
|
||||
import sys
|
||||
from %(module)s import %(import_name)s
|
||||
if __name__ == '__main__':
|
||||
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
||||
sys.exit(%(func)s())
|
||||
'''
|
||||
|
||||
|
||||
def enquote_executable(executable):
|
||||
if ' ' in executable:
|
||||
# make sure we quote only the executable in case of env
|
||||
# for example /usr/bin/env "/dir with spaces/bin/jython"
|
||||
# instead of "/usr/bin/env /dir with spaces/bin/jython"
|
||||
# otherwise whole
|
||||
if executable.startswith('/usr/bin/env '):
|
||||
env, _executable = executable.split(' ', 1)
|
||||
if ' ' in _executable and not _executable.startswith('"'):
|
||||
executable = '%s "%s"' % (env, _executable)
|
||||
else:
|
||||
if not executable.startswith('"'):
|
||||
executable = '"%s"' % executable
|
||||
return executable
|
||||
|
||||
# Keep the old name around (for now), as there is at least one project using it!
|
||||
_enquote_executable = enquote_executable
|
||||
|
||||
class ScriptMaker(object):
|
||||
"""
|
||||
A class to copy or create scripts from source scripts or callable
|
||||
specifications.
|
||||
"""
|
||||
script_template = SCRIPT_TEMPLATE
|
||||
|
||||
executable = None # for shebangs
|
||||
|
||||
def __init__(self, source_dir, target_dir, add_launchers=True,
|
||||
dry_run=False, fileop=None):
|
||||
self.source_dir = source_dir
|
||||
self.target_dir = target_dir
|
||||
self.add_launchers = add_launchers
|
||||
self.force = False
|
||||
self.clobber = False
|
||||
# It only makes sense to set mode bits on POSIX.
|
||||
self.set_mode = (os.name == 'posix') or (os.name == 'java' and
|
||||
os._name == 'posix')
|
||||
self.variants = set(('', 'X.Y'))
|
||||
self._fileop = fileop or FileOperator(dry_run)
|
||||
|
||||
self._is_nt = os.name == 'nt' or (
|
||||
os.name == 'java' and os._name == 'nt')
|
||||
self.version_info = sys.version_info
|
||||
|
||||
def _get_alternate_executable(self, executable, options):
|
||||
if options.get('gui', False) and self._is_nt: # pragma: no cover
|
||||
dn, fn = os.path.split(executable)
|
||||
fn = fn.replace('python', 'pythonw')
|
||||
executable = os.path.join(dn, fn)
|
||||
return executable
|
||||
|
||||
if sys.platform.startswith('java'): # pragma: no cover
|
||||
def _is_shell(self, executable):
|
||||
"""
|
||||
Determine if the specified executable is a script
|
||||
(contains a #! line)
|
||||
"""
|
||||
try:
|
||||
with open(executable) as fp:
|
||||
return fp.read(2) == '#!'
|
||||
except (OSError, IOError):
|
||||
logger.warning('Failed to open %s', executable)
|
||||
return False
|
||||
|
||||
def _fix_jython_executable(self, executable):
|
||||
if self._is_shell(executable):
|
||||
# Workaround for Jython is not needed on Linux systems.
|
||||
import java
|
||||
|
||||
if java.lang.System.getProperty('os.name') == 'Linux':
|
||||
return executable
|
||||
elif executable.lower().endswith('jython.exe'):
|
||||
# Use wrapper exe for Jython on Windows
|
||||
return executable
|
||||
return '/usr/bin/env %s' % executable
|
||||
|
||||
def _build_shebang(self, executable, post_interp):
|
||||
"""
|
||||
Build a shebang line. In the simple case (on Windows, or a shebang line
|
||||
which is not too long or contains spaces) use a simple formulation for
|
||||
the shebang. Otherwise, use /bin/sh as the executable, with a contrived
|
||||
shebang which allows the script to run either under Python or sh, using
|
||||
suitable quoting. Thanks to Harald Nordgren for his input.
|
||||
|
||||
See also: http://www.in-ulm.de/~mascheck/various/shebang/#length
|
||||
https://hg.mozilla.org/mozilla-central/file/tip/mach
|
||||
"""
|
||||
if os.name != 'posix':
|
||||
simple_shebang = True
|
||||
else:
|
||||
# Add 3 for '#!' prefix and newline suffix.
|
||||
shebang_length = len(executable) + len(post_interp) + 3
|
||||
if sys.platform == 'darwin':
|
||||
max_shebang_length = 512
|
||||
else:
|
||||
max_shebang_length = 127
|
||||
simple_shebang = ((b' ' not in executable) and
|
||||
(shebang_length <= max_shebang_length))
|
||||
|
||||
if simple_shebang:
|
||||
result = b'#!' + executable + post_interp + b'\n'
|
||||
else:
|
||||
result = b'#!/bin/sh\n'
|
||||
result += b"'''exec' " + executable + post_interp + b' "$0" "$@"\n'
|
||||
result += b"' '''"
|
||||
return result
|
||||
|
||||
def _get_shebang(self, encoding, post_interp=b'', options=None):
|
||||
enquote = True
|
||||
if self.executable:
|
||||
executable = self.executable
|
||||
enquote = False # assume this will be taken care of
|
||||
elif not sysconfig.is_python_build():
|
||||
executable = get_executable()
|
||||
elif in_venv(): # pragma: no cover
|
||||
executable = os.path.join(sysconfig.get_path('scripts'),
|
||||
'python%s' % sysconfig.get_config_var('EXE'))
|
||||
else: # pragma: no cover
|
||||
executable = os.path.join(
|
||||
sysconfig.get_config_var('BINDIR'),
|
||||
'python%s%s' % (sysconfig.get_config_var('VERSION'),
|
||||
sysconfig.get_config_var('EXE')))
|
||||
if options:
|
||||
executable = self._get_alternate_executable(executable, options)
|
||||
|
||||
if sys.platform.startswith('java'): # pragma: no cover
|
||||
executable = self._fix_jython_executable(executable)
|
||||
|
||||
# Normalise case for Windows - COMMENTED OUT
|
||||
# executable = os.path.normcase(executable)
|
||||
# N.B. The normalising operation above has been commented out: See
|
||||
# issue #124. Although paths in Windows are generally case-insensitive,
|
||||
# they aren't always. For example, a path containing a ẞ (which is a
|
||||
# LATIN CAPITAL LETTER SHARP S - U+1E9E) is normcased to ß (which is a
|
||||
# LATIN SMALL LETTER SHARP S' - U+00DF). The two are not considered by
|
||||
# Windows as equivalent in path names.
|
||||
|
||||
# If the user didn't specify an executable, it may be necessary to
|
||||
# cater for executable paths with spaces (not uncommon on Windows)
|
||||
if enquote:
|
||||
executable = enquote_executable(executable)
|
||||
# Issue #51: don't use fsencode, since we later try to
|
||||
# check that the shebang is decodable using utf-8.
|
||||
executable = executable.encode('utf-8')
|
||||
# in case of IronPython, play safe and enable frames support
|
||||
if (sys.platform == 'cli' and '-X:Frames' not in post_interp
|
||||
and '-X:FullFrames' not in post_interp): # pragma: no cover
|
||||
post_interp += b' -X:Frames'
|
||||
shebang = self._build_shebang(executable, post_interp)
|
||||
# Python parser starts to read a script using UTF-8 until
|
||||
# it gets a #coding:xxx cookie. The shebang has to be the
|
||||
# first line of a file, the #coding:xxx cookie cannot be
|
||||
# written before. So the shebang has to be decodable from
|
||||
# UTF-8.
|
||||
try:
|
||||
shebang.decode('utf-8')
|
||||
except UnicodeDecodeError: # pragma: no cover
|
||||
raise ValueError(
|
||||
'The shebang (%r) is not decodable from utf-8' % shebang)
|
||||
# If the script is encoded to a custom encoding (use a
|
||||
# #coding:xxx cookie), the shebang has to be decodable from
|
||||
# the script encoding too.
|
||||
if encoding != 'utf-8':
|
||||
try:
|
||||
shebang.decode(encoding)
|
||||
except UnicodeDecodeError: # pragma: no cover
|
||||
raise ValueError(
|
||||
'The shebang (%r) is not decodable '
|
||||
'from the script encoding (%r)' % (shebang, encoding))
|
||||
return shebang
|
||||
|
||||
def _get_script_text(self, entry):
|
||||
return self.script_template % dict(module=entry.prefix,
|
||||
import_name=entry.suffix.split('.')[0],
|
||||
func=entry.suffix)
|
||||
|
||||
manifest = _DEFAULT_MANIFEST
|
||||
|
||||
def get_manifest(self, exename):
|
||||
base = os.path.basename(exename)
|
||||
return self.manifest % base
|
||||
|
||||
def _write_script(self, names, shebang, script_bytes, filenames, ext):
|
||||
use_launcher = self.add_launchers and self._is_nt
|
||||
linesep = os.linesep.encode('utf-8')
|
||||
if not shebang.endswith(linesep):
|
||||
shebang += linesep
|
||||
if not use_launcher:
|
||||
script_bytes = shebang + script_bytes
|
||||
else: # pragma: no cover
|
||||
if ext == 'py':
|
||||
launcher = self._get_launcher('t')
|
||||
else:
|
||||
launcher = self._get_launcher('w')
|
||||
stream = BytesIO()
|
||||
with ZipFile(stream, 'w') as zf:
|
||||
zf.writestr('__main__.py', script_bytes)
|
||||
zip_data = stream.getvalue()
|
||||
script_bytes = launcher + shebang + zip_data
|
||||
for name in names:
|
||||
outname = os.path.join(self.target_dir, name)
|
||||
if use_launcher: # pragma: no cover
|
||||
n, e = os.path.splitext(outname)
|
||||
if e.startswith('.py'):
|
||||
outname = n
|
||||
outname = '%s.exe' % outname
|
||||
try:
|
||||
self._fileop.write_binary_file(outname, script_bytes)
|
||||
except Exception:
|
||||
# Failed writing an executable - it might be in use.
|
||||
logger.warning('Failed to write executable - trying to '
|
||||
'use .deleteme logic')
|
||||
dfname = '%s.deleteme' % outname
|
||||
if os.path.exists(dfname):
|
||||
os.remove(dfname) # Not allowed to fail here
|
||||
os.rename(outname, dfname) # nor here
|
||||
self._fileop.write_binary_file(outname, script_bytes)
|
||||
logger.debug('Able to replace executable using '
|
||||
'.deleteme logic')
|
||||
try:
|
||||
os.remove(dfname)
|
||||
except Exception:
|
||||
pass # still in use - ignore error
|
||||
else:
|
||||
if self._is_nt and not outname.endswith('.' + ext): # pragma: no cover
|
||||
outname = '%s.%s' % (outname, ext)
|
||||
if os.path.exists(outname) and not self.clobber:
|
||||
logger.warning('Skipping existing file %s', outname)
|
||||
continue
|
||||
self._fileop.write_binary_file(outname, script_bytes)
|
||||
if self.set_mode:
|
||||
self._fileop.set_executable_mode([outname])
|
||||
filenames.append(outname)
|
||||
|
||||
variant_separator = '-'
|
||||
|
||||
def get_script_filenames(self, name):
|
||||
result = set()
|
||||
if '' in self.variants:
|
||||
result.add(name)
|
||||
if 'X' in self.variants:
|
||||
result.add('%s%s' % (name, self.version_info[0]))
|
||||
if 'X.Y' in self.variants:
|
||||
result.add('%s%s%s.%s' % (name, self.variant_separator,
|
||||
self.version_info[0], self.version_info[1]))
|
||||
return result
|
||||
|
||||
def _make_script(self, entry, filenames, options=None):
|
||||
post_interp = b''
|
||||
if options:
|
||||
args = options.get('interpreter_args', [])
|
||||
if args:
|
||||
args = ' %s' % ' '.join(args)
|
||||
post_interp = args.encode('utf-8')
|
||||
shebang = self._get_shebang('utf-8', post_interp, options=options)
|
||||
script = self._get_script_text(entry).encode('utf-8')
|
||||
scriptnames = self.get_script_filenames(entry.name)
|
||||
if options and options.get('gui', False):
|
||||
ext = 'pyw'
|
||||
else:
|
||||
ext = 'py'
|
||||
self._write_script(scriptnames, shebang, script, filenames, ext)
|
||||
|
||||
def _copy_script(self, script, filenames):
|
||||
adjust = False
|
||||
script = os.path.join(self.source_dir, convert_path(script))
|
||||
outname = os.path.join(self.target_dir, os.path.basename(script))
|
||||
if not self.force and not self._fileop.newer(script, outname):
|
||||
logger.debug('not copying %s (up-to-date)', script)
|
||||
return
|
||||
|
||||
# Always open the file, but ignore failures in dry-run mode --
|
||||
# that way, we'll get accurate feedback if we can read the
|
||||
# script.
|
||||
try:
|
||||
f = open(script, 'rb')
|
||||
except IOError: # pragma: no cover
|
||||
if not self.dry_run:
|
||||
raise
|
||||
f = None
|
||||
else:
|
||||
first_line = f.readline()
|
||||
if not first_line: # pragma: no cover
|
||||
logger.warning('%s is an empty file (skipping)', script)
|
||||
return
|
||||
|
||||
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
|
||||
if match:
|
||||
adjust = True
|
||||
post_interp = match.group(1) or b''
|
||||
|
||||
if not adjust:
|
||||
if f:
|
||||
f.close()
|
||||
self._fileop.copy_file(script, outname)
|
||||
if self.set_mode:
|
||||
self._fileop.set_executable_mode([outname])
|
||||
filenames.append(outname)
|
||||
else:
|
||||
logger.info('copying and adjusting %s -> %s', script,
|
||||
self.target_dir)
|
||||
if not self._fileop.dry_run:
|
||||
encoding, lines = detect_encoding(f.readline)
|
||||
f.seek(0)
|
||||
shebang = self._get_shebang(encoding, post_interp)
|
||||
if b'pythonw' in first_line: # pragma: no cover
|
||||
ext = 'pyw'
|
||||
else:
|
||||
ext = 'py'
|
||||
n = os.path.basename(outname)
|
||||
self._write_script([n], shebang, f.read(), filenames, ext)
|
||||
if f:
|
||||
f.close()
|
||||
|
||||
@property
|
||||
def dry_run(self):
|
||||
return self._fileop.dry_run
|
||||
|
||||
@dry_run.setter
|
||||
def dry_run(self, value):
|
||||
self._fileop.dry_run = value
|
||||
|
||||
if os.name == 'nt' or (os.name == 'java' and os._name == 'nt'): # pragma: no cover
|
||||
# Executable launcher support.
|
||||
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
|
||||
|
||||
def _get_launcher(self, kind):
|
||||
if struct.calcsize('P') == 8: # 64-bit
|
||||
bits = '64'
|
||||
else:
|
||||
bits = '32'
|
||||
name = '%s%s.exe' % (kind, bits)
|
||||
# Issue 31: don't hardcode an absolute package name, but
|
||||
# determine it relative to the current package
|
||||
distlib_package = __name__.rsplit('.', 1)[0]
|
||||
resource = finder(distlib_package).find(name)
|
||||
if not resource:
|
||||
msg = ('Unable to find resource %s in package %s' % (name,
|
||||
distlib_package))
|
||||
raise ValueError(msg)
|
||||
return resource.bytes
|
||||
|
||||
# Public API follows
|
||||
|
||||
def make(self, specification, options=None):
|
||||
"""
|
||||
Make a script.
|
||||
|
||||
:param specification: The specification, which is either a valid export
|
||||
entry specification (to make a script from a
|
||||
callable) or a filename (to make a script by
|
||||
copying from a source location).
|
||||
:param options: A dictionary of options controlling script generation.
|
||||
:return: A list of all absolute pathnames written to.
|
||||
"""
|
||||
filenames = []
|
||||
entry = get_export_entry(specification)
|
||||
if entry is None:
|
||||
self._copy_script(specification, filenames)
|
||||
else:
|
||||
self._make_script(entry, filenames, options=options)
|
||||
return filenames
|
||||
|
||||
def make_multiple(self, specifications, options=None):
|
||||
"""
|
||||
Take a list of specifications and make scripts from them,
|
||||
:param specifications: A list of specifications.
|
||||
:return: A list of all absolute pathnames written to,
|
||||
"""
|
||||
filenames = []
|
||||
for specification in specifications:
|
||||
filenames.extend(self.make(specification, options))
|
||||
return filenames
|
||||
BIN
venv/Lib/site-packages/pip/_vendor/distlib/t32.exe
Normal file
BIN
venv/Lib/site-packages/pip/_vendor/distlib/t32.exe
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/pip/_vendor/distlib/t64.exe
Normal file
BIN
venv/Lib/site-packages/pip/_vendor/distlib/t64.exe
Normal file
Binary file not shown.
1965
venv/Lib/site-packages/pip/_vendor/distlib/util.py
Normal file
1965
venv/Lib/site-packages/pip/_vendor/distlib/util.py
Normal file
File diff suppressed because it is too large
Load Diff
739
venv/Lib/site-packages/pip/_vendor/distlib/version.py
Normal file
739
venv/Lib/site-packages/pip/_vendor/distlib/version.py
Normal file
@@ -0,0 +1,739 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2012-2017 The Python Software Foundation.
|
||||
# See LICENSE.txt and CONTRIBUTORS.txt.
|
||||
#
|
||||
"""
|
||||
Implementation of a flexible versioning scheme providing support for PEP-440,
|
||||
setuptools-compatible and semantic versioning.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
|
||||
from .compat import string_types
|
||||
from .util import parse_requirement
|
||||
|
||||
__all__ = ['NormalizedVersion', 'NormalizedMatcher',
|
||||
'LegacyVersion', 'LegacyMatcher',
|
||||
'SemanticVersion', 'SemanticMatcher',
|
||||
'UnsupportedVersionError', 'get_scheme']
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UnsupportedVersionError(ValueError):
|
||||
"""This is an unsupported version."""
|
||||
pass
|
||||
|
||||
|
||||
class Version(object):
|
||||
def __init__(self, s):
|
||||
self._string = s = s.strip()
|
||||
self._parts = parts = self.parse(s)
|
||||
assert isinstance(parts, tuple)
|
||||
assert len(parts) > 0
|
||||
|
||||
def parse(self, s):
|
||||
raise NotImplementedError('please implement in a subclass')
|
||||
|
||||
def _check_compatible(self, other):
|
||||
if type(self) != type(other):
|
||||
raise TypeError('cannot compare %r and %r' % (self, other))
|
||||
|
||||
def __eq__(self, other):
|
||||
self._check_compatible(other)
|
||||
return self._parts == other._parts
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __lt__(self, other):
|
||||
self._check_compatible(other)
|
||||
return self._parts < other._parts
|
||||
|
||||
def __gt__(self, other):
|
||||
return not (self.__lt__(other) or self.__eq__(other))
|
||||
|
||||
def __le__(self, other):
|
||||
return self.__lt__(other) or self.__eq__(other)
|
||||
|
||||
def __ge__(self, other):
|
||||
return self.__gt__(other) or self.__eq__(other)
|
||||
|
||||
# See http://docs.python.org/reference/datamodel#object.__hash__
|
||||
def __hash__(self):
|
||||
return hash(self._parts)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s('%s')" % (self.__class__.__name__, self._string)
|
||||
|
||||
def __str__(self):
|
||||
return self._string
|
||||
|
||||
@property
|
||||
def is_prerelease(self):
|
||||
raise NotImplementedError('Please implement in subclasses.')
|
||||
|
||||
|
||||
class Matcher(object):
|
||||
version_class = None
|
||||
|
||||
# value is either a callable or the name of a method
|
||||
_operators = {
|
||||
'<': lambda v, c, p: v < c,
|
||||
'>': lambda v, c, p: v > c,
|
||||
'<=': lambda v, c, p: v == c or v < c,
|
||||
'>=': lambda v, c, p: v == c or v > c,
|
||||
'==': lambda v, c, p: v == c,
|
||||
'===': lambda v, c, p: v == c,
|
||||
# by default, compatible => >=.
|
||||
'~=': lambda v, c, p: v == c or v > c,
|
||||
'!=': lambda v, c, p: v != c,
|
||||
}
|
||||
|
||||
# this is a method only to support alternative implementations
|
||||
# via overriding
|
||||
def parse_requirement(self, s):
|
||||
return parse_requirement(s)
|
||||
|
||||
def __init__(self, s):
|
||||
if self.version_class is None:
|
||||
raise ValueError('Please specify a version class')
|
||||
self._string = s = s.strip()
|
||||
r = self.parse_requirement(s)
|
||||
if not r:
|
||||
raise ValueError('Not valid: %r' % s)
|
||||
self.name = r.name
|
||||
self.key = self.name.lower() # for case-insensitive comparisons
|
||||
clist = []
|
||||
if r.constraints:
|
||||
# import pdb; pdb.set_trace()
|
||||
for op, s in r.constraints:
|
||||
if s.endswith('.*'):
|
||||
if op not in ('==', '!='):
|
||||
raise ValueError('\'.*\' not allowed for '
|
||||
'%r constraints' % op)
|
||||
# Could be a partial version (e.g. for '2.*') which
|
||||
# won't parse as a version, so keep it as a string
|
||||
vn, prefix = s[:-2], True
|
||||
# Just to check that vn is a valid version
|
||||
self.version_class(vn)
|
||||
else:
|
||||
# Should parse as a version, so we can create an
|
||||
# instance for the comparison
|
||||
vn, prefix = self.version_class(s), False
|
||||
clist.append((op, vn, prefix))
|
||||
self._parts = tuple(clist)
|
||||
|
||||
def match(self, version):
|
||||
"""
|
||||
Check if the provided version matches the constraints.
|
||||
|
||||
:param version: The version to match against this instance.
|
||||
:type version: String or :class:`Version` instance.
|
||||
"""
|
||||
if isinstance(version, string_types):
|
||||
version = self.version_class(version)
|
||||
for operator, constraint, prefix in self._parts:
|
||||
f = self._operators.get(operator)
|
||||
if isinstance(f, string_types):
|
||||
f = getattr(self, f)
|
||||
if not f:
|
||||
msg = ('%r not implemented '
|
||||
'for %s' % (operator, self.__class__.__name__))
|
||||
raise NotImplementedError(msg)
|
||||
if not f(version, constraint, prefix):
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def exact_version(self):
|
||||
result = None
|
||||
if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='):
|
||||
result = self._parts[0][1]
|
||||
return result
|
||||
|
||||
def _check_compatible(self, other):
|
||||
if type(self) != type(other) or self.name != other.name:
|
||||
raise TypeError('cannot compare %s and %s' % (self, other))
|
||||
|
||||
def __eq__(self, other):
|
||||
self._check_compatible(other)
|
||||
return self.key == other.key and self._parts == other._parts
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
# See http://docs.python.org/reference/datamodel#object.__hash__
|
||||
def __hash__(self):
|
||||
return hash(self.key) + hash(self._parts)
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%r)" % (self.__class__.__name__, self._string)
|
||||
|
||||
def __str__(self):
|
||||
return self._string
|
||||
|
||||
|
||||
PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|b|c|rc)(\d+))?'
|
||||
r'(\.(post)(\d+))?(\.(dev)(\d+))?'
|
||||
r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$')
|
||||
|
||||
|
||||
def _pep_440_key(s):
|
||||
s = s.strip()
|
||||
m = PEP440_VERSION_RE.match(s)
|
||||
if not m:
|
||||
raise UnsupportedVersionError('Not a valid version: %s' % s)
|
||||
groups = m.groups()
|
||||
nums = tuple(int(v) for v in groups[1].split('.'))
|
||||
while len(nums) > 1 and nums[-1] == 0:
|
||||
nums = nums[:-1]
|
||||
|
||||
if not groups[0]:
|
||||
epoch = 0
|
||||
else:
|
||||
epoch = int(groups[0])
|
||||
pre = groups[4:6]
|
||||
post = groups[7:9]
|
||||
dev = groups[10:12]
|
||||
local = groups[13]
|
||||
if pre == (None, None):
|
||||
pre = ()
|
||||
else:
|
||||
pre = pre[0], int(pre[1])
|
||||
if post == (None, None):
|
||||
post = ()
|
||||
else:
|
||||
post = post[0], int(post[1])
|
||||
if dev == (None, None):
|
||||
dev = ()
|
||||
else:
|
||||
dev = dev[0], int(dev[1])
|
||||
if local is None:
|
||||
local = ()
|
||||
else:
|
||||
parts = []
|
||||
for part in local.split('.'):
|
||||
# to ensure that numeric compares as > lexicographic, avoid
|
||||
# comparing them directly, but encode a tuple which ensures
|
||||
# correct sorting
|
||||
if part.isdigit():
|
||||
part = (1, int(part))
|
||||
else:
|
||||
part = (0, part)
|
||||
parts.append(part)
|
||||
local = tuple(parts)
|
||||
if not pre:
|
||||
# either before pre-release, or final release and after
|
||||
if not post and dev:
|
||||
# before pre-release
|
||||
pre = ('a', -1) # to sort before a0
|
||||
else:
|
||||
pre = ('z',) # to sort after all pre-releases
|
||||
# now look at the state of post and dev.
|
||||
if not post:
|
||||
post = ('_',) # sort before 'a'
|
||||
if not dev:
|
||||
dev = ('final',)
|
||||
|
||||
#print('%s -> %s' % (s, m.groups()))
|
||||
return epoch, nums, pre, post, dev, local
|
||||
|
||||
|
||||
_normalized_key = _pep_440_key
|
||||
|
||||
|
||||
class NormalizedVersion(Version):
|
||||
"""A rational version.
|
||||
|
||||
Good:
|
||||
1.2 # equivalent to "1.2.0"
|
||||
1.2.0
|
||||
1.2a1
|
||||
1.2.3a2
|
||||
1.2.3b1
|
||||
1.2.3c1
|
||||
1.2.3.4
|
||||
TODO: fill this out
|
||||
|
||||
Bad:
|
||||
1 # minimum two numbers
|
||||
1.2a # release level must have a release serial
|
||||
1.2.3b
|
||||
"""
|
||||
def parse(self, s):
|
||||
result = _normalized_key(s)
|
||||
# _normalized_key loses trailing zeroes in the release
|
||||
# clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
|
||||
# However, PEP 440 prefix matching needs it: for example,
|
||||
# (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
|
||||
m = PEP440_VERSION_RE.match(s) # must succeed
|
||||
groups = m.groups()
|
||||
self._release_clause = tuple(int(v) for v in groups[1].split('.'))
|
||||
return result
|
||||
|
||||
PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
|
||||
|
||||
@property
|
||||
def is_prerelease(self):
|
||||
return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
|
||||
|
||||
|
||||
def _match_prefix(x, y):
|
||||
x = str(x)
|
||||
y = str(y)
|
||||
if x == y:
|
||||
return True
|
||||
if not x.startswith(y):
|
||||
return False
|
||||
n = len(y)
|
||||
return x[n] == '.'
|
||||
|
||||
|
||||
class NormalizedMatcher(Matcher):
|
||||
version_class = NormalizedVersion
|
||||
|
||||
# value is either a callable or the name of a method
|
||||
_operators = {
|
||||
'~=': '_match_compatible',
|
||||
'<': '_match_lt',
|
||||
'>': '_match_gt',
|
||||
'<=': '_match_le',
|
||||
'>=': '_match_ge',
|
||||
'==': '_match_eq',
|
||||
'===': '_match_arbitrary',
|
||||
'!=': '_match_ne',
|
||||
}
|
||||
|
||||
def _adjust_local(self, version, constraint, prefix):
|
||||
if prefix:
|
||||
strip_local = '+' not in constraint and version._parts[-1]
|
||||
else:
|
||||
# both constraint and version are
|
||||
# NormalizedVersion instances.
|
||||
# If constraint does not have a local component,
|
||||
# ensure the version doesn't, either.
|
||||
strip_local = not constraint._parts[-1] and version._parts[-1]
|
||||
if strip_local:
|
||||
s = version._string.split('+', 1)[0]
|
||||
version = self.version_class(s)
|
||||
return version, constraint
|
||||
|
||||
def _match_lt(self, version, constraint, prefix):
|
||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||
if version >= constraint:
|
||||
return False
|
||||
release_clause = constraint._release_clause
|
||||
pfx = '.'.join([str(i) for i in release_clause])
|
||||
return not _match_prefix(version, pfx)
|
||||
|
||||
def _match_gt(self, version, constraint, prefix):
|
||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||
if version <= constraint:
|
||||
return False
|
||||
release_clause = constraint._release_clause
|
||||
pfx = '.'.join([str(i) for i in release_clause])
|
||||
return not _match_prefix(version, pfx)
|
||||
|
||||
def _match_le(self, version, constraint, prefix):
|
||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||
return version <= constraint
|
||||
|
||||
def _match_ge(self, version, constraint, prefix):
|
||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||
return version >= constraint
|
||||
|
||||
def _match_eq(self, version, constraint, prefix):
|
||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||
if not prefix:
|
||||
result = (version == constraint)
|
||||
else:
|
||||
result = _match_prefix(version, constraint)
|
||||
return result
|
||||
|
||||
def _match_arbitrary(self, version, constraint, prefix):
|
||||
return str(version) == str(constraint)
|
||||
|
||||
def _match_ne(self, version, constraint, prefix):
|
||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||
if not prefix:
|
||||
result = (version != constraint)
|
||||
else:
|
||||
result = not _match_prefix(version, constraint)
|
||||
return result
|
||||
|
||||
def _match_compatible(self, version, constraint, prefix):
|
||||
version, constraint = self._adjust_local(version, constraint, prefix)
|
||||
if version == constraint:
|
||||
return True
|
||||
if version < constraint:
|
||||
return False
|
||||
# if not prefix:
|
||||
# return True
|
||||
release_clause = constraint._release_clause
|
||||
if len(release_clause) > 1:
|
||||
release_clause = release_clause[:-1]
|
||||
pfx = '.'.join([str(i) for i in release_clause])
|
||||
return _match_prefix(version, pfx)
|
||||
|
||||
_REPLACEMENTS = (
|
||||
(re.compile('[.+-]$'), ''), # remove trailing puncts
|
||||
(re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start
|
||||
(re.compile('^[.-]'), ''), # remove leading puncts
|
||||
(re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses
|
||||
(re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
||||
(re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
||||
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
||||
(re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha
|
||||
(re.compile(r'\b(pre-alpha|prealpha)\b'),
|
||||
'pre.alpha'), # standardise
|
||||
(re.compile(r'\(beta\)$'), 'beta'), # remove parentheses
|
||||
)
|
||||
|
||||
_SUFFIX_REPLACEMENTS = (
|
||||
(re.compile('^[:~._+-]+'), ''), # remove leading puncts
|
||||
(re.compile('[,*")([\\]]'), ''), # remove unwanted chars
|
||||
(re.compile('[~:+_ -]'), '.'), # replace illegal chars
|
||||
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
||||
(re.compile(r'\.$'), ''), # trailing '.'
|
||||
)
|
||||
|
||||
_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
|
||||
|
||||
|
||||
def _suggest_semantic_version(s):
|
||||
"""
|
||||
Try to suggest a semantic form for a version for which
|
||||
_suggest_normalized_version couldn't come up with anything.
|
||||
"""
|
||||
result = s.strip().lower()
|
||||
for pat, repl in _REPLACEMENTS:
|
||||
result = pat.sub(repl, result)
|
||||
if not result:
|
||||
result = '0.0.0'
|
||||
|
||||
# Now look for numeric prefix, and separate it out from
|
||||
# the rest.
|
||||
#import pdb; pdb.set_trace()
|
||||
m = _NUMERIC_PREFIX.match(result)
|
||||
if not m:
|
||||
prefix = '0.0.0'
|
||||
suffix = result
|
||||
else:
|
||||
prefix = m.groups()[0].split('.')
|
||||
prefix = [int(i) for i in prefix]
|
||||
while len(prefix) < 3:
|
||||
prefix.append(0)
|
||||
if len(prefix) == 3:
|
||||
suffix = result[m.end():]
|
||||
else:
|
||||
suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
|
||||
prefix = prefix[:3]
|
||||
prefix = '.'.join([str(i) for i in prefix])
|
||||
suffix = suffix.strip()
|
||||
if suffix:
|
||||
#import pdb; pdb.set_trace()
|
||||
# massage the suffix.
|
||||
for pat, repl in _SUFFIX_REPLACEMENTS:
|
||||
suffix = pat.sub(repl, suffix)
|
||||
|
||||
if not suffix:
|
||||
result = prefix
|
||||
else:
|
||||
sep = '-' if 'dev' in suffix else '+'
|
||||
result = prefix + sep + suffix
|
||||
if not is_semver(result):
|
||||
result = None
|
||||
return result
|
||||
|
||||
|
||||
def _suggest_normalized_version(s):
|
||||
"""Suggest a normalized version close to the given version string.
|
||||
|
||||
If you have a version string that isn't rational (i.e. NormalizedVersion
|
||||
doesn't like it) then you might be able to get an equivalent (or close)
|
||||
rational version from this function.
|
||||
|
||||
This does a number of simple normalizations to the given string, based
|
||||
on observation of versions currently in use on PyPI. Given a dump of
|
||||
those version during PyCon 2009, 4287 of them:
|
||||
- 2312 (53.93%) match NormalizedVersion without change
|
||||
with the automatic suggestion
|
||||
- 3474 (81.04%) match when using this suggestion method
|
||||
|
||||
@param s {str} An irrational version string.
|
||||
@returns A rational version string, or None, if couldn't determine one.
|
||||
"""
|
||||
try:
|
||||
_normalized_key(s)
|
||||
return s # already rational
|
||||
except UnsupportedVersionError:
|
||||
pass
|
||||
|
||||
rs = s.lower()
|
||||
|
||||
# part of this could use maketrans
|
||||
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
|
||||
('beta', 'b'), ('rc', 'c'), ('-final', ''),
|
||||
('-pre', 'c'),
|
||||
('-release', ''), ('.release', ''), ('-stable', ''),
|
||||
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
|
||||
('final', '')):
|
||||
rs = rs.replace(orig, repl)
|
||||
|
||||
# if something ends with dev or pre, we add a 0
|
||||
rs = re.sub(r"pre$", r"pre0", rs)
|
||||
rs = re.sub(r"dev$", r"dev0", rs)
|
||||
|
||||
# if we have something like "b-2" or "a.2" at the end of the
|
||||
# version, that is probably beta, alpha, etc
|
||||
# let's remove the dash or dot
|
||||
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
|
||||
|
||||
# 1.0-dev-r371 -> 1.0.dev371
|
||||
# 0.1-dev-r79 -> 0.1.dev79
|
||||
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
|
||||
|
||||
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
|
||||
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
|
||||
|
||||
# Clean: v0.3, v1.0
|
||||
if rs.startswith('v'):
|
||||
rs = rs[1:]
|
||||
|
||||
# Clean leading '0's on numbers.
|
||||
#TODO: unintended side-effect on, e.g., "2003.05.09"
|
||||
# PyPI stats: 77 (~2%) better
|
||||
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
|
||||
|
||||
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
|
||||
# zero.
|
||||
# PyPI stats: 245 (7.56%) better
|
||||
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
|
||||
|
||||
# the 'dev-rNNN' tag is a dev tag
|
||||
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
|
||||
|
||||
# clean the - when used as a pre delimiter
|
||||
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
|
||||
|
||||
# a terminal "dev" or "devel" can be changed into ".dev0"
|
||||
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
|
||||
|
||||
# a terminal "dev" can be changed into ".dev0"
|
||||
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
|
||||
|
||||
# a terminal "final" or "stable" can be removed
|
||||
rs = re.sub(r"(final|stable)$", "", rs)
|
||||
|
||||
# The 'r' and the '-' tags are post release tags
|
||||
# 0.4a1.r10 -> 0.4a1.post10
|
||||
# 0.9.33-17222 -> 0.9.33.post17222
|
||||
# 0.9.33-r17222 -> 0.9.33.post17222
|
||||
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
|
||||
|
||||
# Clean 'r' instead of 'dev' usage:
|
||||
# 0.9.33+r17222 -> 0.9.33.dev17222
|
||||
# 1.0dev123 -> 1.0.dev123
|
||||
# 1.0.git123 -> 1.0.dev123
|
||||
# 1.0.bzr123 -> 1.0.dev123
|
||||
# 0.1a0dev.123 -> 0.1a0.dev123
|
||||
# PyPI stats: ~150 (~4%) better
|
||||
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
|
||||
|
||||
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
|
||||
# 0.2.pre1 -> 0.2c1
|
||||
# 0.2-c1 -> 0.2c1
|
||||
# 1.0preview123 -> 1.0c123
|
||||
# PyPI stats: ~21 (0.62%) better
|
||||
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
|
||||
|
||||
# Tcl/Tk uses "px" for their post release markers
|
||||
rs = re.sub(r"p(\d+)$", r".post\1", rs)
|
||||
|
||||
try:
|
||||
_normalized_key(rs)
|
||||
except UnsupportedVersionError:
|
||||
rs = None
|
||||
return rs
|
||||
|
||||
#
|
||||
# Legacy version processing (distribute-compatible)
|
||||
#
|
||||
|
||||
_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
|
||||
_VERSION_REPLACE = {
|
||||
'pre': 'c',
|
||||
'preview': 'c',
|
||||
'-': 'final-',
|
||||
'rc': 'c',
|
||||
'dev': '@',
|
||||
'': None,
|
||||
'.': None,
|
||||
}
|
||||
|
||||
|
||||
def _legacy_key(s):
|
||||
def get_parts(s):
|
||||
result = []
|
||||
for p in _VERSION_PART.split(s.lower()):
|
||||
p = _VERSION_REPLACE.get(p, p)
|
||||
if p:
|
||||
if '0' <= p[:1] <= '9':
|
||||
p = p.zfill(8)
|
||||
else:
|
||||
p = '*' + p
|
||||
result.append(p)
|
||||
result.append('*final')
|
||||
return result
|
||||
|
||||
result = []
|
||||
for p in get_parts(s):
|
||||
if p.startswith('*'):
|
||||
if p < '*final':
|
||||
while result and result[-1] == '*final-':
|
||||
result.pop()
|
||||
while result and result[-1] == '00000000':
|
||||
result.pop()
|
||||
result.append(p)
|
||||
return tuple(result)
|
||||
|
||||
|
||||
class LegacyVersion(Version):
|
||||
def parse(self, s):
|
||||
return _legacy_key(s)
|
||||
|
||||
@property
|
||||
def is_prerelease(self):
|
||||
result = False
|
||||
for x in self._parts:
|
||||
if (isinstance(x, string_types) and x.startswith('*') and
|
||||
x < '*final'):
|
||||
result = True
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
class LegacyMatcher(Matcher):
|
||||
version_class = LegacyVersion
|
||||
|
||||
_operators = dict(Matcher._operators)
|
||||
_operators['~='] = '_match_compatible'
|
||||
|
||||
numeric_re = re.compile(r'^(\d+(\.\d+)*)')
|
||||
|
||||
def _match_compatible(self, version, constraint, prefix):
|
||||
if version < constraint:
|
||||
return False
|
||||
m = self.numeric_re.match(str(constraint))
|
||||
if not m:
|
||||
logger.warning('Cannot compute compatible match for version %s '
|
||||
' and constraint %s', version, constraint)
|
||||
return True
|
||||
s = m.groups()[0]
|
||||
if '.' in s:
|
||||
s = s.rsplit('.', 1)[0]
|
||||
return _match_prefix(version, s)
|
||||
|
||||
#
|
||||
# Semantic versioning
|
||||
#
|
||||
|
||||
_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
|
||||
r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
|
||||
r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
|
||||
|
||||
|
||||
def is_semver(s):
|
||||
return _SEMVER_RE.match(s)
|
||||
|
||||
|
||||
def _semantic_key(s):
|
||||
def make_tuple(s, absent):
|
||||
if s is None:
|
||||
result = (absent,)
|
||||
else:
|
||||
parts = s[1:].split('.')
|
||||
# We can't compare ints and strings on Python 3, so fudge it
|
||||
# by zero-filling numeric values so simulate a numeric comparison
|
||||
result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
|
||||
return result
|
||||
|
||||
m = is_semver(s)
|
||||
if not m:
|
||||
raise UnsupportedVersionError(s)
|
||||
groups = m.groups()
|
||||
major, minor, patch = [int(i) for i in groups[:3]]
|
||||
# choose the '|' and '*' so that versions sort correctly
|
||||
pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
|
||||
return (major, minor, patch), pre, build
|
||||
|
||||
|
||||
class SemanticVersion(Version):
|
||||
def parse(self, s):
|
||||
return _semantic_key(s)
|
||||
|
||||
@property
|
||||
def is_prerelease(self):
|
||||
return self._parts[1][0] != '|'
|
||||
|
||||
|
||||
class SemanticMatcher(Matcher):
|
||||
version_class = SemanticVersion
|
||||
|
||||
|
||||
class VersionScheme(object):
|
||||
def __init__(self, key, matcher, suggester=None):
|
||||
self.key = key
|
||||
self.matcher = matcher
|
||||
self.suggester = suggester
|
||||
|
||||
def is_valid_version(self, s):
|
||||
try:
|
||||
self.matcher.version_class(s)
|
||||
result = True
|
||||
except UnsupportedVersionError:
|
||||
result = False
|
||||
return result
|
||||
|
||||
def is_valid_matcher(self, s):
|
||||
try:
|
||||
self.matcher(s)
|
||||
result = True
|
||||
except UnsupportedVersionError:
|
||||
result = False
|
||||
return result
|
||||
|
||||
def is_valid_constraint_list(self, s):
|
||||
"""
|
||||
Used for processing some metadata fields
|
||||
"""
|
||||
# See issue #140. Be tolerant of a single trailing comma.
|
||||
if s.endswith(','):
|
||||
s = s[:-1]
|
||||
return self.is_valid_matcher('dummy_name (%s)' % s)
|
||||
|
||||
def suggest(self, s):
|
||||
if self.suggester is None:
|
||||
result = None
|
||||
else:
|
||||
result = self.suggester(s)
|
||||
return result
|
||||
|
||||
_SCHEMES = {
|
||||
'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
|
||||
_suggest_normalized_version),
|
||||
'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
|
||||
'semantic': VersionScheme(_semantic_key, SemanticMatcher,
|
||||
_suggest_semantic_version),
|
||||
}
|
||||
|
||||
_SCHEMES['default'] = _SCHEMES['normalized']
|
||||
|
||||
|
||||
def get_scheme(name):
|
||||
if name not in _SCHEMES:
|
||||
raise ValueError('unknown scheme name: %r' % name)
|
||||
return _SCHEMES[name]
|
||||
BIN
venv/Lib/site-packages/pip/_vendor/distlib/w32.exe
Normal file
BIN
venv/Lib/site-packages/pip/_vendor/distlib/w32.exe
Normal file
Binary file not shown.
BIN
venv/Lib/site-packages/pip/_vendor/distlib/w64.exe
Normal file
BIN
venv/Lib/site-packages/pip/_vendor/distlib/w64.exe
Normal file
Binary file not shown.
1056
venv/Lib/site-packages/pip/_vendor/distlib/wheel.py
Normal file
1056
venv/Lib/site-packages/pip/_vendor/distlib/wheel.py
Normal file
File diff suppressed because it is too large
Load Diff
1230
venv/Lib/site-packages/pip/_vendor/distro.py
Normal file
1230
venv/Lib/site-packages/pip/_vendor/distro.py
Normal file
File diff suppressed because it is too large
Load Diff
35
venv/Lib/site-packages/pip/_vendor/html5lib/__init__.py
Normal file
35
venv/Lib/site-packages/pip/_vendor/html5lib/__init__.py
Normal file
@@ -0,0 +1,35 @@
|
||||
"""
|
||||
HTML parsing library based on the `WHATWG HTML specification
|
||||
<https://whatwg.org/html>`_. The parser is designed to be compatible with
|
||||
existing HTML found in the wild and implements well-defined error recovery that
|
||||
is largely compatible with modern desktop web browsers.
|
||||
|
||||
Example usage::
|
||||
|
||||
from pip._vendor import html5lib
|
||||
with open("my_document.html", "rb") as f:
|
||||
tree = html5lib.parse(f)
|
||||
|
||||
For convenience, this module re-exports the following names:
|
||||
|
||||
* :func:`~.html5parser.parse`
|
||||
* :func:`~.html5parser.parseFragment`
|
||||
* :class:`~.html5parser.HTMLParser`
|
||||
* :func:`~.treebuilders.getTreeBuilder`
|
||||
* :func:`~.treewalkers.getTreeWalker`
|
||||
* :func:`~.serializer.serialize`
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .html5parser import HTMLParser, parse, parseFragment
|
||||
from .treebuilders import getTreeBuilder
|
||||
from .treewalkers import getTreeWalker
|
||||
from .serializer import serialize
|
||||
|
||||
__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||
"getTreeWalker", "serialize"]
|
||||
|
||||
# this has to be at the top level, see how setup.py parses this
|
||||
#: Distribution version number.
|
||||
__version__ = "1.1"
|
||||
289
venv/Lib/site-packages/pip/_vendor/html5lib/_ihatexml.py
Normal file
289
venv/Lib/site-packages/pip/_vendor/html5lib/_ihatexml.py
Normal file
@@ -0,0 +1,289 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from .constants import DataLossWarning
|
||||
|
||||
baseChar = """
|
||||
[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] |
|
||||
[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] |
|
||||
[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] |
|
||||
[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 |
|
||||
[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] |
|
||||
[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] |
|
||||
[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] |
|
||||
[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] |
|
||||
[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 |
|
||||
[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] |
|
||||
[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] |
|
||||
[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D |
|
||||
[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] |
|
||||
[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] |
|
||||
[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] |
|
||||
[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] |
|
||||
[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] |
|
||||
[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] |
|
||||
[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 |
|
||||
[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] |
|
||||
[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] |
|
||||
[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] |
|
||||
[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] |
|
||||
[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] |
|
||||
[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] |
|
||||
[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] |
|
||||
[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] |
|
||||
[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] |
|
||||
[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] |
|
||||
[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A |
|
||||
#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 |
|
||||
#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] |
|
||||
#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] |
|
||||
[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] |
|
||||
[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C |
|
||||
#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 |
|
||||
[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] |
|
||||
[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] |
|
||||
[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 |
|
||||
[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] |
|
||||
[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B |
|
||||
#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE |
|
||||
[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] |
|
||||
[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 |
|
||||
[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] |
|
||||
[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
|
||||
|
||||
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
|
||||
|
||||
combiningCharacter = """
|
||||
[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] |
|
||||
[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 |
|
||||
[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] |
|
||||
[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] |
|
||||
#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] |
|
||||
[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] |
|
||||
[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 |
|
||||
#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] |
|
||||
[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC |
|
||||
[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] |
|
||||
#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] |
|
||||
[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] |
|
||||
[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] |
|
||||
[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] |
|
||||
[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] |
|
||||
[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] |
|
||||
#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 |
|
||||
[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] |
|
||||
#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] |
|
||||
[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] |
|
||||
[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] |
|
||||
#x3099 | #x309A"""
|
||||
|
||||
digit = """
|
||||
[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] |
|
||||
[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] |
|
||||
[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] |
|
||||
[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
|
||||
|
||||
extender = """
|
||||
#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 |
|
||||
#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
|
||||
|
||||
letter = " | ".join([baseChar, ideographic])
|
||||
|
||||
# Without the
|
||||
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
|
||||
extender])
|
||||
nameFirst = " | ".join([letter, "_"])
|
||||
|
||||
reChar = re.compile(r"#x([\d|A-F]{4,4})")
|
||||
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
|
||||
|
||||
|
||||
def charStringToList(chars):
|
||||
charRanges = [item.strip() for item in chars.split(" | ")]
|
||||
rv = []
|
||||
for item in charRanges:
|
||||
foundMatch = False
|
||||
for regexp in (reChar, reCharRange):
|
||||
match = regexp.match(item)
|
||||
if match is not None:
|
||||
rv.append([hexToInt(item) for item in match.groups()])
|
||||
if len(rv[-1]) == 1:
|
||||
rv[-1] = rv[-1] * 2
|
||||
foundMatch = True
|
||||
break
|
||||
if not foundMatch:
|
||||
assert len(item) == 1
|
||||
|
||||
rv.append([ord(item)] * 2)
|
||||
rv = normaliseCharList(rv)
|
||||
return rv
|
||||
|
||||
|
||||
def normaliseCharList(charList):
|
||||
charList = sorted(charList)
|
||||
for item in charList:
|
||||
assert item[1] >= item[0]
|
||||
rv = []
|
||||
i = 0
|
||||
while i < len(charList):
|
||||
j = 1
|
||||
rv.append(charList[i])
|
||||
while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1:
|
||||
rv[-1][1] = charList[i + j][1]
|
||||
j += 1
|
||||
i += j
|
||||
return rv
|
||||
|
||||
|
||||
# We don't really support characters above the BMP :(
|
||||
max_unicode = int("FFFF", 16)
|
||||
|
||||
|
||||
def missingRanges(charList):
|
||||
rv = []
|
||||
if charList[0] != 0:
|
||||
rv.append([0, charList[0][0] - 1])
|
||||
for i, item in enumerate(charList[:-1]):
|
||||
rv.append([item[1] + 1, charList[i + 1][0] - 1])
|
||||
if charList[-1][1] != max_unicode:
|
||||
rv.append([charList[-1][1] + 1, max_unicode])
|
||||
return rv
|
||||
|
||||
|
||||
def listToRegexpStr(charList):
|
||||
rv = []
|
||||
for item in charList:
|
||||
if item[0] == item[1]:
|
||||
rv.append(escapeRegexp(chr(item[0])))
|
||||
else:
|
||||
rv.append(escapeRegexp(chr(item[0])) + "-" +
|
||||
escapeRegexp(chr(item[1])))
|
||||
return "[%s]" % "".join(rv)
|
||||
|
||||
|
||||
def hexToInt(hex_str):
|
||||
return int(hex_str, 16)
|
||||
|
||||
|
||||
def escapeRegexp(string):
|
||||
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
|
||||
"[", "]", "|", "(", ")", "-")
|
||||
for char in specialCharacters:
|
||||
string = string.replace(char, "\\" + char)
|
||||
|
||||
return string
|
||||
|
||||
# output from the above
|
||||
nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa
|
||||
|
||||
# Simpler things
|
||||
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
|
||||
|
||||
|
||||
class InfosetFilter(object):
|
||||
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
|
||||
|
||||
def __init__(self,
|
||||
dropXmlnsLocalName=False,
|
||||
dropXmlnsAttrNs=False,
|
||||
preventDoubleDashComments=False,
|
||||
preventDashAtCommentEnd=False,
|
||||
replaceFormFeedCharacters=True,
|
||||
preventSingleQuotePubid=False):
|
||||
|
||||
self.dropXmlnsLocalName = dropXmlnsLocalName
|
||||
self.dropXmlnsAttrNs = dropXmlnsAttrNs
|
||||
|
||||
self.preventDoubleDashComments = preventDoubleDashComments
|
||||
self.preventDashAtCommentEnd = preventDashAtCommentEnd
|
||||
|
||||
self.replaceFormFeedCharacters = replaceFormFeedCharacters
|
||||
|
||||
self.preventSingleQuotePubid = preventSingleQuotePubid
|
||||
|
||||
self.replaceCache = {}
|
||||
|
||||
def coerceAttribute(self, name, namespace=None):
|
||||
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
|
||||
warnings.warn("Attributes cannot begin with xmlns", DataLossWarning)
|
||||
return None
|
||||
elif (self.dropXmlnsAttrNs and
|
||||
namespace == "http://www.w3.org/2000/xmlns/"):
|
||||
warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning)
|
||||
return None
|
||||
else:
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceElement(self, name):
|
||||
return self.toXmlName(name)
|
||||
|
||||
def coerceComment(self, data):
|
||||
if self.preventDoubleDashComments:
|
||||
while "--" in data:
|
||||
warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
|
||||
data = data.replace("--", "- -")
|
||||
if data.endswith("-"):
|
||||
warnings.warn("Comments cannot end in a dash", DataLossWarning)
|
||||
data += " "
|
||||
return data
|
||||
|
||||
def coerceCharacters(self, data):
|
||||
if self.replaceFormFeedCharacters:
|
||||
for _ in range(data.count("\x0C")):
|
||||
warnings.warn("Text cannot contain U+000C", DataLossWarning)
|
||||
data = data.replace("\x0C", " ")
|
||||
# Other non-xml characters
|
||||
return data
|
||||
|
||||
def coercePubid(self, data):
|
||||
dataOutput = data
|
||||
for char in nonPubidCharRegexp.findall(data):
|
||||
warnings.warn("Coercing non-XML pubid", DataLossWarning)
|
||||
replacement = self.getReplacementCharacter(char)
|
||||
dataOutput = dataOutput.replace(char, replacement)
|
||||
if self.preventSingleQuotePubid and dataOutput.find("'") >= 0:
|
||||
warnings.warn("Pubid cannot contain single quote", DataLossWarning)
|
||||
dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'"))
|
||||
return dataOutput
|
||||
|
||||
def toXmlName(self, name):
|
||||
nameFirst = name[0]
|
||||
nameRest = name[1:]
|
||||
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
||||
if m:
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
||||
else:
|
||||
nameFirstOutput = nameFirst
|
||||
|
||||
nameRestOutput = nameRest
|
||||
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
||||
for char in replaceChars:
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
replacement = self.getReplacementCharacter(char)
|
||||
nameRestOutput = nameRestOutput.replace(char, replacement)
|
||||
return nameFirstOutput + nameRestOutput
|
||||
|
||||
def getReplacementCharacter(self, char):
|
||||
if char in self.replaceCache:
|
||||
replacement = self.replaceCache[char]
|
||||
else:
|
||||
replacement = self.escapeChar(char)
|
||||
return replacement
|
||||
|
||||
def fromXmlName(self, name):
|
||||
for item in set(self.replacementRegexp.findall(name)):
|
||||
name = name.replace(item, self.unescapeChar(item))
|
||||
return name
|
||||
|
||||
def escapeChar(self, char):
|
||||
replacement = "U%05X" % ord(char)
|
||||
self.replaceCache[char] = replacement
|
||||
return replacement
|
||||
|
||||
def unescapeChar(self, charcode):
|
||||
return chr(int(charcode[1:], 16))
|
||||
918
venv/Lib/site-packages/pip/_vendor/html5lib/_inputstream.py
Normal file
918
venv/Lib/site-packages/pip/_vendor/html5lib/_inputstream.py
Normal file
@@ -0,0 +1,918 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from pip._vendor.six import text_type
|
||||
from pip._vendor.six.moves import http_client, urllib
|
||||
|
||||
import codecs
|
||||
import re
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
from pip._vendor import webencodings
|
||||
|
||||
from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
||||
from .constants import _ReparseException
|
||||
from . import _utils
|
||||
|
||||
# Non-unicode versions of constants for use in the pre-parser
|
||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||
asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
|
||||
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
|
||||
|
||||
|
||||
invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa
|
||||
|
||||
if _utils.supports_lone_surrogates:
|
||||
# Use one extra step of indirection and create surrogates with
|
||||
# eval. Not using this indirection would introduce an illegal
|
||||
# unicode literal on platforms not supporting such lone
|
||||
# surrogates.
|
||||
assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
|
||||
eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used
|
||||
"]")
|
||||
else:
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
||||
|
||||
non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF}
|
||||
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
||||
|
||||
# Cache for charsUntil()
|
||||
charsUntilRegEx = {}
|
||||
|
||||
|
||||
class BufferedStream(object):
|
||||
"""Buffering for streams that do not have buffering of their own
|
||||
|
||||
The buffer is implemented as a list of chunks on the assumption that
|
||||
joining many strings will be slow since it is O(n**2)
|
||||
"""
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream
|
||||
self.buffer = []
|
||||
self.position = [-1, 0] # chunk number, offset
|
||||
|
||||
def tell(self):
|
||||
pos = 0
|
||||
for chunk in self.buffer[:self.position[0]]:
|
||||
pos += len(chunk)
|
||||
pos += self.position[1]
|
||||
return pos
|
||||
|
||||
def seek(self, pos):
|
||||
assert pos <= self._bufferedBytes()
|
||||
offset = pos
|
||||
i = 0
|
||||
while len(self.buffer[i]) < offset:
|
||||
offset -= len(self.buffer[i])
|
||||
i += 1
|
||||
self.position = [i, offset]
|
||||
|
||||
def read(self, bytes):
|
||||
if not self.buffer:
|
||||
return self._readStream(bytes)
|
||||
elif (self.position[0] == len(self.buffer) and
|
||||
self.position[1] == len(self.buffer[-1])):
|
||||
return self._readStream(bytes)
|
||||
else:
|
||||
return self._readFromBuffer(bytes)
|
||||
|
||||
def _bufferedBytes(self):
|
||||
return sum([len(item) for item in self.buffer])
|
||||
|
||||
def _readStream(self, bytes):
|
||||
data = self.stream.read(bytes)
|
||||
self.buffer.append(data)
|
||||
self.position[0] += 1
|
||||
self.position[1] = len(data)
|
||||
return data
|
||||
|
||||
def _readFromBuffer(self, bytes):
|
||||
remainingBytes = bytes
|
||||
rv = []
|
||||
bufferIndex = self.position[0]
|
||||
bufferOffset = self.position[1]
|
||||
while bufferIndex < len(self.buffer) and remainingBytes != 0:
|
||||
assert remainingBytes > 0
|
||||
bufferedData = self.buffer[bufferIndex]
|
||||
|
||||
if remainingBytes <= len(bufferedData) - bufferOffset:
|
||||
bytesToRead = remainingBytes
|
||||
self.position = [bufferIndex, bufferOffset + bytesToRead]
|
||||
else:
|
||||
bytesToRead = len(bufferedData) - bufferOffset
|
||||
self.position = [bufferIndex, len(bufferedData)]
|
||||
bufferIndex += 1
|
||||
rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead])
|
||||
remainingBytes -= bytesToRead
|
||||
|
||||
bufferOffset = 0
|
||||
|
||||
if remainingBytes:
|
||||
rv.append(self._readStream(remainingBytes))
|
||||
|
||||
return b"".join(rv)
|
||||
|
||||
|
||||
def HTMLInputStream(source, **kwargs):
|
||||
# Work around Python bug #20007: read(0) closes the connection.
|
||||
# http://bugs.python.org/issue20007
|
||||
if (isinstance(source, http_client.HTTPResponse) or
|
||||
# Also check for addinfourl wrapping HTTPResponse
|
||||
(isinstance(source, urllib.response.addbase) and
|
||||
isinstance(source.fp, http_client.HTTPResponse))):
|
||||
isUnicode = False
|
||||
elif hasattr(source, "read"):
|
||||
isUnicode = isinstance(source.read(0), text_type)
|
||||
else:
|
||||
isUnicode = isinstance(source, text_type)
|
||||
|
||||
if isUnicode:
|
||||
encodings = [x for x in kwargs if x.endswith("_encoding")]
|
||||
if encodings:
|
||||
raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
|
||||
|
||||
return HTMLUnicodeInputStream(source, **kwargs)
|
||||
else:
|
||||
return HTMLBinaryInputStream(source, **kwargs)
|
||||
|
||||
|
||||
class HTMLUnicodeInputStream(object):
|
||||
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
||||
|
||||
This class takes care of character encoding and removing or replacing
|
||||
incorrect byte-sequences and also provides column and line tracking.
|
||||
|
||||
"""
|
||||
|
||||
_defaultChunkSize = 10240
|
||||
|
||||
def __init__(self, source):
|
||||
"""Initialises the HTMLInputStream.
|
||||
|
||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
for use by html5lib.
|
||||
|
||||
source can be either a file-object, local filename or a string.
|
||||
|
||||
The optional encoding parameter must be a string that indicates
|
||||
the encoding. If specified, that encoding will be used,
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
"""
|
||||
|
||||
if not _utils.supports_lone_surrogates:
|
||||
# Such platforms will have already checked for such
|
||||
# surrogate errors, so no need to do this checking.
|
||||
self.reportCharacterErrors = None
|
||||
elif len("\U0010FFFF") == 1:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS4
|
||||
else:
|
||||
self.reportCharacterErrors = self.characterErrorsUCS2
|
||||
|
||||
# List of where new lines occur
|
||||
self.newLines = [0]
|
||||
|
||||
self.charEncoding = (lookupEncoding("utf-8"), "certain")
|
||||
self.dataStream = self.openStream(source)
|
||||
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.chunk = ""
|
||||
self.chunkSize = 0
|
||||
self.chunkOffset = 0
|
||||
self.errors = []
|
||||
|
||||
# number of (complete) lines in previous chunks
|
||||
self.prevNumLines = 0
|
||||
# number of columns in the last line of the previous chunk
|
||||
self.prevNumCols = 0
|
||||
|
||||
# Deal with CR LF and surrogates split over chunk boundaries
|
||||
self._bufferedCharacter = None
|
||||
|
||||
def openStream(self, source):
|
||||
"""Produces a file object from source.
|
||||
|
||||
source can be either a file object, local filename or a string.
|
||||
|
||||
"""
|
||||
# Already a file object
|
||||
if hasattr(source, 'read'):
|
||||
stream = source
|
||||
else:
|
||||
stream = StringIO(source)
|
||||
|
||||
return stream
|
||||
|
||||
def _position(self, offset):
|
||||
chunk = self.chunk
|
||||
nLines = chunk.count('\n', 0, offset)
|
||||
positionLine = self.prevNumLines + nLines
|
||||
lastLinePos = chunk.rfind('\n', 0, offset)
|
||||
if lastLinePos == -1:
|
||||
positionColumn = self.prevNumCols + offset
|
||||
else:
|
||||
positionColumn = offset - (lastLinePos + 1)
|
||||
return (positionLine, positionColumn)
|
||||
|
||||
def position(self):
|
||||
"""Returns (line, col) of the current position in the stream."""
|
||||
line, col = self._position(self.chunkOffset)
|
||||
return (line + 1, col)
|
||||
|
||||
def char(self):
|
||||
""" Read one character from the stream or queue if available. Return
|
||||
EOF when EOF is reached.
|
||||
"""
|
||||
# Read a new chunk from the input stream if necessary
|
||||
if self.chunkOffset >= self.chunkSize:
|
||||
if not self.readChunk():
|
||||
return EOF
|
||||
|
||||
chunkOffset = self.chunkOffset
|
||||
char = self.chunk[chunkOffset]
|
||||
self.chunkOffset = chunkOffset + 1
|
||||
|
||||
return char
|
||||
|
||||
def readChunk(self, chunkSize=None):
|
||||
if chunkSize is None:
|
||||
chunkSize = self._defaultChunkSize
|
||||
|
||||
self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
|
||||
|
||||
self.chunk = ""
|
||||
self.chunkSize = 0
|
||||
self.chunkOffset = 0
|
||||
|
||||
data = self.dataStream.read(chunkSize)
|
||||
|
||||
# Deal with CR LF and surrogates broken across chunks
|
||||
if self._bufferedCharacter:
|
||||
data = self._bufferedCharacter + data
|
||||
self._bufferedCharacter = None
|
||||
elif not data:
|
||||
# We have no more data, bye-bye stream
|
||||
return False
|
||||
|
||||
if len(data) > 1:
|
||||
lastv = ord(data[-1])
|
||||
if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
|
||||
self._bufferedCharacter = data[-1]
|
||||
data = data[:-1]
|
||||
|
||||
if self.reportCharacterErrors:
|
||||
self.reportCharacterErrors(data)
|
||||
|
||||
# Replace invalid characters
|
||||
data = data.replace("\r\n", "\n")
|
||||
data = data.replace("\r", "\n")
|
||||
|
||||
self.chunk = data
|
||||
self.chunkSize = len(data)
|
||||
|
||||
return True
|
||||
|
||||
def characterErrorsUCS4(self, data):
|
||||
for _ in range(len(invalid_unicode_re.findall(data))):
|
||||
self.errors.append("invalid-codepoint")
|
||||
|
||||
def characterErrorsUCS2(self, data):
|
||||
# Someone picked the wrong compile option
|
||||
# You lose
|
||||
skip = False
|
||||
for match in invalid_unicode_re.finditer(data):
|
||||
if skip:
|
||||
continue
|
||||
codepoint = ord(match.group())
|
||||
pos = match.start()
|
||||
# Pretty sure there should be endianness issues here
|
||||
if _utils.isSurrogatePair(data[pos:pos + 2]):
|
||||
# We have a surrogate pair!
|
||||
char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
|
||||
if char_val in non_bmp_invalid_codepoints:
|
||||
self.errors.append("invalid-codepoint")
|
||||
skip = True
|
||||
elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
|
||||
pos == len(data) - 1):
|
||||
self.errors.append("invalid-codepoint")
|
||||
else:
|
||||
skip = False
|
||||
self.errors.append("invalid-codepoint")
|
||||
|
||||
def charsUntil(self, characters, opposite=False):
|
||||
""" Returns a string of characters from the stream up to but not
|
||||
including any character in 'characters' or EOF. 'characters' must be
|
||||
a container that supports the 'in' method and iteration over its
|
||||
characters.
|
||||
"""
|
||||
|
||||
# Use a cache of regexps to find the required characters
|
||||
try:
|
||||
chars = charsUntilRegEx[(characters, opposite)]
|
||||
except KeyError:
|
||||
if __debug__:
|
||||
for c in characters:
|
||||
assert(ord(c) < 128)
|
||||
regex = "".join(["\\x%02x" % ord(c) for c in characters])
|
||||
if not opposite:
|
||||
regex = "^%s" % regex
|
||||
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
|
||||
|
||||
rv = []
|
||||
|
||||
while True:
|
||||
# Find the longest matching prefix
|
||||
m = chars.match(self.chunk, self.chunkOffset)
|
||||
if m is None:
|
||||
# If nothing matched, and it wasn't because we ran out of chunk,
|
||||
# then stop
|
||||
if self.chunkOffset != self.chunkSize:
|
||||
break
|
||||
else:
|
||||
end = m.end()
|
||||
# If not the whole chunk matched, return everything
|
||||
# up to the part that didn't match
|
||||
if end != self.chunkSize:
|
||||
rv.append(self.chunk[self.chunkOffset:end])
|
||||
self.chunkOffset = end
|
||||
break
|
||||
# If the whole remainder of the chunk matched,
|
||||
# use it all and read the next chunk
|
||||
rv.append(self.chunk[self.chunkOffset:])
|
||||
if not self.readChunk():
|
||||
# Reached EOF
|
||||
break
|
||||
|
||||
r = "".join(rv)
|
||||
return r
|
||||
|
||||
def unget(self, char):
|
||||
# Only one character is allowed to be ungotten at once - it must
|
||||
# be consumed again before any further call to unget
|
||||
if char is not EOF:
|
||||
if self.chunkOffset == 0:
|
||||
# unget is called quite rarely, so it's a good idea to do
|
||||
# more work here if it saves a bit of work in the frequently
|
||||
# called char and charsUntil.
|
||||
# So, just prepend the ungotten character onto the current
|
||||
# chunk:
|
||||
self.chunk = char + self.chunk
|
||||
self.chunkSize += 1
|
||||
else:
|
||||
self.chunkOffset -= 1
|
||||
assert self.chunk[self.chunkOffset] == char
|
||||
|
||||
|
||||
class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
"""Provides a unicode stream of characters to the HTMLTokenizer.
|
||||
|
||||
This class takes care of character encoding and removing or replacing
|
||||
incorrect byte-sequences and also provides column and line tracking.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, source, override_encoding=None, transport_encoding=None,
|
||||
same_origin_parent_encoding=None, likely_encoding=None,
|
||||
default_encoding="windows-1252", useChardet=True):
|
||||
"""Initialises the HTMLInputStream.
|
||||
|
||||
HTMLInputStream(source, [encoding]) -> Normalized stream from source
|
||||
for use by html5lib.
|
||||
|
||||
source can be either a file-object, local filename or a string.
|
||||
|
||||
The optional encoding parameter must be a string that indicates
|
||||
the encoding. If specified, that encoding will be used,
|
||||
regardless of any BOM or later declaration (such as in a meta
|
||||
element)
|
||||
|
||||
"""
|
||||
# Raw Stream - for unicode objects this will encode to utf-8 and set
|
||||
# self.charEncoding as appropriate
|
||||
self.rawStream = self.openStream(source)
|
||||
|
||||
HTMLUnicodeInputStream.__init__(self, self.rawStream)
|
||||
|
||||
# Encoding Information
|
||||
# Number of bytes to use when looking for a meta element with
|
||||
# encoding information
|
||||
self.numBytesMeta = 1024
|
||||
# Number of bytes to use when using detecting encoding using chardet
|
||||
self.numBytesChardet = 100
|
||||
# Things from args
|
||||
self.override_encoding = override_encoding
|
||||
self.transport_encoding = transport_encoding
|
||||
self.same_origin_parent_encoding = same_origin_parent_encoding
|
||||
self.likely_encoding = likely_encoding
|
||||
self.default_encoding = default_encoding
|
||||
|
||||
# Determine encoding
|
||||
self.charEncoding = self.determineEncoding(useChardet)
|
||||
assert self.charEncoding[0] is not None
|
||||
|
||||
# Call superclass
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
|
||||
HTMLUnicodeInputStream.reset(self)
|
||||
|
||||
def openStream(self, source):
|
||||
"""Produces a file object from source.
|
||||
|
||||
source can be either a file object, local filename or a string.
|
||||
|
||||
"""
|
||||
# Already a file object
|
||||
if hasattr(source, 'read'):
|
||||
stream = source
|
||||
else:
|
||||
stream = BytesIO(source)
|
||||
|
||||
try:
|
||||
stream.seek(stream.tell())
|
||||
except Exception:
|
||||
stream = BufferedStream(stream)
|
||||
|
||||
return stream
|
||||
|
||||
def determineEncoding(self, chardet=True):
|
||||
# BOMs take precedence over everything
|
||||
# This will also read past the BOM if present
|
||||
charEncoding = self.detectBOM(), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# If we've been overridden, we've been overridden
|
||||
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Now check the transport layer
|
||||
charEncoding = lookupEncoding(self.transport_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Look for meta elements with encoding information
|
||||
charEncoding = self.detectEncodingMeta(), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Parent document encoding
|
||||
charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
|
||||
if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
|
||||
return charEncoding
|
||||
|
||||
# "likely" encoding
|
||||
charEncoding = lookupEncoding(self.likely_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Guess with chardet, if available
|
||||
if chardet:
|
||||
try:
|
||||
from pip._vendor.chardet.universaldetector import UniversalDetector
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
buffers = []
|
||||
detector = UniversalDetector()
|
||||
while not detector.done:
|
||||
buffer = self.rawStream.read(self.numBytesChardet)
|
||||
assert isinstance(buffer, bytes)
|
||||
if not buffer:
|
||||
break
|
||||
buffers.append(buffer)
|
||||
detector.feed(buffer)
|
||||
detector.close()
|
||||
encoding = lookupEncoding(detector.result['encoding'])
|
||||
self.rawStream.seek(0)
|
||||
if encoding is not None:
|
||||
return encoding, "tentative"
|
||||
|
||||
# Try the default encoding
|
||||
charEncoding = lookupEncoding(self.default_encoding), "tentative"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# Fallback to html5lib's default if even that hasn't worked
|
||||
return lookupEncoding("windows-1252"), "tentative"
|
||||
|
||||
def changeEncoding(self, newEncoding):
|
||||
assert self.charEncoding[1] != "certain"
|
||||
newEncoding = lookupEncoding(newEncoding)
|
||||
if newEncoding is None:
|
||||
return
|
||||
if newEncoding.name in ("utf-16be", "utf-16le"):
|
||||
newEncoding = lookupEncoding("utf-8")
|
||||
assert newEncoding is not None
|
||||
elif newEncoding == self.charEncoding[0]:
|
||||
self.charEncoding = (self.charEncoding[0], "certain")
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
self.charEncoding = (newEncoding, "certain")
|
||||
self.reset()
|
||||
raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
|
||||
|
||||
def detectBOM(self):
|
||||
"""Attempts to detect at BOM at the start of the stream. If
|
||||
an encoding can be determined from the BOM return the name of the
|
||||
encoding otherwise return None"""
|
||||
bomDict = {
|
||||
codecs.BOM_UTF8: 'utf-8',
|
||||
codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
|
||||
codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
|
||||
}
|
||||
|
||||
# Go to beginning of file and read in 4 bytes
|
||||
string = self.rawStream.read(4)
|
||||
assert isinstance(string, bytes)
|
||||
|
||||
# Try detecting the BOM using bytes from the string
|
||||
encoding = bomDict.get(string[:3]) # UTF-8
|
||||
seek = 3
|
||||
if not encoding:
|
||||
# Need to detect UTF-32 before UTF-16
|
||||
encoding = bomDict.get(string) # UTF-32
|
||||
seek = 4
|
||||
if not encoding:
|
||||
encoding = bomDict.get(string[:2]) # UTF-16
|
||||
seek = 2
|
||||
|
||||
# Set the read position past the BOM if one was found, otherwise
|
||||
# set it to the start of the stream
|
||||
if encoding:
|
||||
self.rawStream.seek(seek)
|
||||
return lookupEncoding(encoding)
|
||||
else:
|
||||
self.rawStream.seek(0)
|
||||
return None
|
||||
|
||||
def detectEncodingMeta(self):
|
||||
"""Report the encoding declared by the meta element
|
||||
"""
|
||||
buffer = self.rawStream.read(self.numBytesMeta)
|
||||
assert isinstance(buffer, bytes)
|
||||
parser = EncodingParser(buffer)
|
||||
self.rawStream.seek(0)
|
||||
encoding = parser.getEncoding()
|
||||
|
||||
if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
|
||||
encoding = lookupEncoding("utf-8")
|
||||
|
||||
return encoding
|
||||
|
||||
|
||||
class EncodingBytes(bytes):
|
||||
"""String-like object with an associated position and various extra methods
|
||||
If the position is ever greater than the string length then an exception is
|
||||
raised"""
|
||||
def __new__(self, value):
|
||||
assert isinstance(value, bytes)
|
||||
return bytes.__new__(self, value.lower())
|
||||
|
||||
def __init__(self, value):
|
||||
# pylint:disable=unused-argument
|
||||
self._position = -1
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
p = self._position = self._position + 1
|
||||
if p >= len(self):
|
||||
raise StopIteration
|
||||
elif p < 0:
|
||||
raise TypeError
|
||||
return self[p:p + 1]
|
||||
|
||||
def next(self):
|
||||
# Py2 compat
|
||||
return self.__next__()
|
||||
|
||||
def previous(self):
|
||||
p = self._position
|
||||
if p >= len(self):
|
||||
raise StopIteration
|
||||
elif p < 0:
|
||||
raise TypeError
|
||||
self._position = p = p - 1
|
||||
return self[p:p + 1]
|
||||
|
||||
def setPosition(self, position):
|
||||
if self._position >= len(self):
|
||||
raise StopIteration
|
||||
self._position = position
|
||||
|
||||
def getPosition(self):
|
||||
if self._position >= len(self):
|
||||
raise StopIteration
|
||||
if self._position >= 0:
|
||||
return self._position
|
||||
else:
|
||||
return None
|
||||
|
||||
position = property(getPosition, setPosition)
|
||||
|
||||
def getCurrentByte(self):
|
||||
return self[self.position:self.position + 1]
|
||||
|
||||
currentByte = property(getCurrentByte)
|
||||
|
||||
def skip(self, chars=spaceCharactersBytes):
|
||||
"""Skip past a list of characters"""
|
||||
p = self.position # use property for the error-checking
|
||||
while p < len(self):
|
||||
c = self[p:p + 1]
|
||||
if c not in chars:
|
||||
self._position = p
|
||||
return c
|
||||
p += 1
|
||||
self._position = p
|
||||
return None
|
||||
|
||||
def skipUntil(self, chars):
|
||||
p = self.position
|
||||
while p < len(self):
|
||||
c = self[p:p + 1]
|
||||
if c in chars:
|
||||
self._position = p
|
||||
return c
|
||||
p += 1
|
||||
self._position = p
|
||||
return None
|
||||
|
||||
def matchBytes(self, bytes):
|
||||
"""Look for a sequence of bytes at the start of a string. If the bytes
|
||||
are found return True and advance the position to the byte after the
|
||||
match. Otherwise return False and leave the position alone"""
|
||||
rv = self.startswith(bytes, self.position)
|
||||
if rv:
|
||||
self.position += len(bytes)
|
||||
return rv
|
||||
|
||||
def jumpTo(self, bytes):
|
||||
"""Look for the next sequence of bytes matching a given sequence. If
|
||||
a match is found advance the position to the last byte of the match"""
|
||||
try:
|
||||
self._position = self.index(bytes, self.position) + len(bytes) - 1
|
||||
except ValueError:
|
||||
raise StopIteration
|
||||
return True
|
||||
|
||||
|
||||
class EncodingParser(object):
|
||||
"""Mini parser for detecting character encoding from meta elements"""
|
||||
|
||||
def __init__(self, data):
|
||||
"""string - the data to work on for encoding detection"""
|
||||
self.data = EncodingBytes(data)
|
||||
self.encoding = None
|
||||
|
||||
def getEncoding(self):
|
||||
if b"<meta" not in self.data:
|
||||
return None
|
||||
|
||||
methodDispatch = (
|
||||
(b"<!--", self.handleComment),
|
||||
(b"<meta", self.handleMeta),
|
||||
(b"</", self.handlePossibleEndTag),
|
||||
(b"<!", self.handleOther),
|
||||
(b"<?", self.handleOther),
|
||||
(b"<", self.handlePossibleStartTag))
|
||||
for _ in self.data:
|
||||
keepParsing = True
|
||||
try:
|
||||
self.data.jumpTo(b"<")
|
||||
except StopIteration:
|
||||
break
|
||||
for key, method in methodDispatch:
|
||||
if self.data.matchBytes(key):
|
||||
try:
|
||||
keepParsing = method()
|
||||
break
|
||||
except StopIteration:
|
||||
keepParsing = False
|
||||
break
|
||||
if not keepParsing:
|
||||
break
|
||||
|
||||
return self.encoding
|
||||
|
||||
def handleComment(self):
|
||||
"""Skip over comments"""
|
||||
return self.data.jumpTo(b"-->")
|
||||
|
||||
def handleMeta(self):
|
||||
if self.data.currentByte not in spaceCharactersBytes:
|
||||
# if we have <meta not followed by a space so just keep going
|
||||
return True
|
||||
# We have a valid meta element we want to search for attributes
|
||||
hasPragma = False
|
||||
pendingEncoding = None
|
||||
while True:
|
||||
# Try to find the next attribute after the current position
|
||||
attr = self.getAttribute()
|
||||
if attr is None:
|
||||
return True
|
||||
else:
|
||||
if attr[0] == b"http-equiv":
|
||||
hasPragma = attr[1] == b"content-type"
|
||||
if hasPragma and pendingEncoding is not None:
|
||||
self.encoding = pendingEncoding
|
||||
return False
|
||||
elif attr[0] == b"charset":
|
||||
tentativeEncoding = attr[1]
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
self.encoding = codec
|
||||
return False
|
||||
elif attr[0] == b"content":
|
||||
contentParser = ContentAttrParser(EncodingBytes(attr[1]))
|
||||
tentativeEncoding = contentParser.parse()
|
||||
if tentativeEncoding is not None:
|
||||
codec = lookupEncoding(tentativeEncoding)
|
||||
if codec is not None:
|
||||
if hasPragma:
|
||||
self.encoding = codec
|
||||
return False
|
||||
else:
|
||||
pendingEncoding = codec
|
||||
|
||||
def handlePossibleStartTag(self):
|
||||
return self.handlePossibleTag(False)
|
||||
|
||||
def handlePossibleEndTag(self):
|
||||
next(self.data)
|
||||
return self.handlePossibleTag(True)
|
||||
|
||||
def handlePossibleTag(self, endTag):
|
||||
data = self.data
|
||||
if data.currentByte not in asciiLettersBytes:
|
||||
# If the next byte is not an ascii letter either ignore this
|
||||
# fragment (possible start tag case) or treat it according to
|
||||
# handleOther
|
||||
if endTag:
|
||||
data.previous()
|
||||
self.handleOther()
|
||||
return True
|
||||
|
||||
c = data.skipUntil(spacesAngleBrackets)
|
||||
if c == b"<":
|
||||
# return to the first step in the overall "two step" algorithm
|
||||
# reprocessing the < byte
|
||||
data.previous()
|
||||
else:
|
||||
# Read all attributes
|
||||
attr = self.getAttribute()
|
||||
while attr is not None:
|
||||
attr = self.getAttribute()
|
||||
return True
|
||||
|
||||
def handleOther(self):
|
||||
return self.data.jumpTo(b">")
|
||||
|
||||
def getAttribute(self):
|
||||
"""Return a name,value pair for the next attribute in the stream,
|
||||
if one is found, or None"""
|
||||
data = self.data
|
||||
# Step 1 (skip chars)
|
||||
c = data.skip(spaceCharactersBytes | frozenset([b"/"]))
|
||||
assert c is None or len(c) == 1
|
||||
# Step 2
|
||||
if c in (b">", None):
|
||||
return None
|
||||
# Step 3
|
||||
attrName = []
|
||||
attrValue = []
|
||||
# Step 4 attribute name
|
||||
while True:
|
||||
if c == b"=" and attrName:
|
||||
break
|
||||
elif c in spaceCharactersBytes:
|
||||
# Step 6!
|
||||
c = data.skip()
|
||||
break
|
||||
elif c in (b"/", b">"):
|
||||
return b"".join(attrName), b""
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrName.append(c.lower())
|
||||
elif c is None:
|
||||
return None
|
||||
else:
|
||||
attrName.append(c)
|
||||
# Step 5
|
||||
c = next(data)
|
||||
# Step 7
|
||||
if c != b"=":
|
||||
data.previous()
|
||||
return b"".join(attrName), b""
|
||||
# Step 8
|
||||
next(data)
|
||||
# Step 9
|
||||
c = data.skip()
|
||||
# Step 10
|
||||
if c in (b"'", b'"'):
|
||||
# 10.1
|
||||
quoteChar = c
|
||||
while True:
|
||||
# 10.2
|
||||
c = next(data)
|
||||
# 10.3
|
||||
if c == quoteChar:
|
||||
next(data)
|
||||
return b"".join(attrName), b"".join(attrValue)
|
||||
# 10.4
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrValue.append(c.lower())
|
||||
# 10.5
|
||||
else:
|
||||
attrValue.append(c)
|
||||
elif c == b">":
|
||||
return b"".join(attrName), b""
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrValue.append(c.lower())
|
||||
elif c is None:
|
||||
return None
|
||||
else:
|
||||
attrValue.append(c)
|
||||
# Step 11
|
||||
while True:
|
||||
c = next(data)
|
||||
if c in spacesAngleBrackets:
|
||||
return b"".join(attrName), b"".join(attrValue)
|
||||
elif c in asciiUppercaseBytes:
|
||||
attrValue.append(c.lower())
|
||||
elif c is None:
|
||||
return None
|
||||
else:
|
||||
attrValue.append(c)
|
||||
|
||||
|
||||
class ContentAttrParser(object):
|
||||
def __init__(self, data):
|
||||
assert isinstance(data, bytes)
|
||||
self.data = data
|
||||
|
||||
def parse(self):
|
||||
try:
|
||||
# Check if the attr name is charset
|
||||
# otherwise return
|
||||
self.data.jumpTo(b"charset")
|
||||
self.data.position += 1
|
||||
self.data.skip()
|
||||
if not self.data.currentByte == b"=":
|
||||
# If there is no = sign keep looking for attrs
|
||||
return None
|
||||
self.data.position += 1
|
||||
self.data.skip()
|
||||
# Look for an encoding between matching quote marks
|
||||
if self.data.currentByte in (b'"', b"'"):
|
||||
quoteMark = self.data.currentByte
|
||||
self.data.position += 1
|
||||
oldPosition = self.data.position
|
||||
if self.data.jumpTo(quoteMark):
|
||||
return self.data[oldPosition:self.data.position]
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
# Unquoted value
|
||||
oldPosition = self.data.position
|
||||
try:
|
||||
self.data.skipUntil(spaceCharactersBytes)
|
||||
return self.data[oldPosition:self.data.position]
|
||||
except StopIteration:
|
||||
# Return the whole remaining value
|
||||
return self.data[oldPosition:]
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
|
||||
def lookupEncoding(encoding):
|
||||
"""Return the python codec name corresponding to an encoding or None if the
|
||||
string doesn't correspond to a valid encoding."""
|
||||
if isinstance(encoding, bytes):
|
||||
try:
|
||||
encoding = encoding.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
|
||||
if encoding is not None:
|
||||
try:
|
||||
return webencodings.lookup(encoding)
|
||||
except AttributeError:
|
||||
return None
|
||||
else:
|
||||
return None
|
||||
1735
venv/Lib/site-packages/pip/_vendor/html5lib/_tokenizer.py
Normal file
1735
venv/Lib/site-packages/pip/_vendor/html5lib/_tokenizer.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,5 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .py import Trie
|
||||
|
||||
__all__ = ["Trie"]
|
||||
40
venv/Lib/site-packages/pip/_vendor/html5lib/_trie/_base.py
Normal file
40
venv/Lib/site-packages/pip/_vendor/html5lib/_trie/_base.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
except ImportError: # Python 2.7
|
||||
from collections import Mapping
|
||||
|
||||
|
||||
class Trie(Mapping):
|
||||
"""Abstract base class for tries"""
|
||||
|
||||
def keys(self, prefix=None):
|
||||
# pylint:disable=arguments-differ
|
||||
keys = super(Trie, self).keys()
|
||||
|
||||
if prefix is None:
|
||||
return set(keys)
|
||||
|
||||
return {x for x in keys if x.startswith(prefix)}
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
for key in self.keys():
|
||||
if key.startswith(prefix):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def longest_prefix(self, prefix):
|
||||
if prefix in self:
|
||||
return prefix
|
||||
|
||||
for i in range(1, len(prefix) + 1):
|
||||
if prefix[:-i] in self:
|
||||
return prefix[:-i]
|
||||
|
||||
raise KeyError(prefix)
|
||||
|
||||
def longest_prefix_item(self, prefix):
|
||||
lprefix = self.longest_prefix(prefix)
|
||||
return (lprefix, self[lprefix])
|
||||
67
venv/Lib/site-packages/pip/_vendor/html5lib/_trie/py.py
Normal file
67
venv/Lib/site-packages/pip/_vendor/html5lib/_trie/py.py
Normal file
@@ -0,0 +1,67 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from pip._vendor.six import text_type
|
||||
|
||||
from bisect import bisect_left
|
||||
|
||||
from ._base import Trie as ABCTrie
|
||||
|
||||
|
||||
class Trie(ABCTrie):
|
||||
def __init__(self, data):
|
||||
if not all(isinstance(x, text_type) for x in data.keys()):
|
||||
raise TypeError("All keys must be strings")
|
||||
|
||||
self._data = data
|
||||
self._keys = sorted(data.keys())
|
||||
self._cachestr = ""
|
||||
self._cachepoints = (0, len(data))
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._data
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._data)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def keys(self, prefix=None):
|
||||
if prefix is None or prefix == "" or not self._keys:
|
||||
return set(self._keys)
|
||||
|
||||
if prefix.startswith(self._cachestr):
|
||||
lo, hi = self._cachepoints
|
||||
start = i = bisect_left(self._keys, prefix, lo, hi)
|
||||
else:
|
||||
start = i = bisect_left(self._keys, prefix)
|
||||
|
||||
keys = set()
|
||||
if start == len(self._keys):
|
||||
return keys
|
||||
|
||||
while self._keys[i].startswith(prefix):
|
||||
keys.add(self._keys[i])
|
||||
i += 1
|
||||
|
||||
self._cachestr = prefix
|
||||
self._cachepoints = (start, i)
|
||||
|
||||
return keys
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
if prefix in self._data:
|
||||
return True
|
||||
|
||||
if prefix.startswith(self._cachestr):
|
||||
lo, hi = self._cachepoints
|
||||
i = bisect_left(self._keys, prefix, lo, hi)
|
||||
else:
|
||||
i = bisect_left(self._keys, prefix)
|
||||
|
||||
if i == len(self._keys):
|
||||
return False
|
||||
|
||||
return self._keys[i].startswith(prefix)
|
||||
159
venv/Lib/site-packages/pip/_vendor/html5lib/_utils.py
Normal file
159
venv/Lib/site-packages/pip/_vendor/html5lib/_utils.py
Normal file
@@ -0,0 +1,159 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from types import ModuleType
|
||||
|
||||
try:
|
||||
from collections.abc import Mapping
|
||||
except ImportError:
|
||||
from collections import Mapping
|
||||
|
||||
from pip._vendor.six import text_type, PY3
|
||||
|
||||
if PY3:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
else:
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
|
||||
|
||||
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||
"surrogatePairToCodepoint", "moduleFactoryFactory",
|
||||
"supports_lone_surrogates"]
|
||||
|
||||
|
||||
# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
|
||||
# caught by the below test. In general this would be any platform
|
||||
# using UTF-16 as its encoding of unicode strings, such as
|
||||
# Jython. This is because UTF-16 itself is based on the use of such
|
||||
# surrogates, and there is no mechanism to further escape such
|
||||
# escapes.
|
||||
try:
|
||||
_x = eval('"\\uD800"') # pylint:disable=eval-used
|
||||
if not isinstance(_x, text_type):
|
||||
# We need this with u"" because of http://bugs.jython.org/issue2039
|
||||
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
||||
assert isinstance(_x, text_type)
|
||||
except Exception:
|
||||
supports_lone_surrogates = False
|
||||
else:
|
||||
supports_lone_surrogates = True
|
||||
|
||||
|
||||
class MethodDispatcher(dict):
|
||||
"""Dict with 2 special properties:
|
||||
|
||||
On initiation, keys that are lists, sets or tuples are converted to
|
||||
multiple keys so accessing any one of the items in the original
|
||||
list-like object returns the matching value
|
||||
|
||||
md = MethodDispatcher({("foo", "bar"):"baz"})
|
||||
md["foo"] == "baz"
|
||||
|
||||
A default value which can be set through the default attribute.
|
||||
"""
|
||||
|
||||
def __init__(self, items=()):
|
||||
_dictEntries = []
|
||||
for name, value in items:
|
||||
if isinstance(name, (list, tuple, frozenset, set)):
|
||||
for item in name:
|
||||
_dictEntries.append((item, value))
|
||||
else:
|
||||
_dictEntries.append((name, value))
|
||||
dict.__init__(self, _dictEntries)
|
||||
assert len(self) == len(_dictEntries)
|
||||
self.default = None
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.get(self, key, self.default)
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
return BoundMethodDispatcher(instance, self)
|
||||
|
||||
|
||||
class BoundMethodDispatcher(Mapping):
|
||||
"""Wraps a MethodDispatcher, binding its return values to `instance`"""
|
||||
def __init__(self, instance, dispatcher):
|
||||
self.instance = instance
|
||||
self.dispatcher = dispatcher
|
||||
|
||||
def __getitem__(self, key):
|
||||
# see https://docs.python.org/3/reference/datamodel.html#object.__get__
|
||||
# on a function, __get__ is used to bind a function to an instance as a bound method
|
||||
return self.dispatcher[key].__get__(self.instance)
|
||||
|
||||
def get(self, key, default):
|
||||
if key in self.dispatcher:
|
||||
return self[key]
|
||||
else:
|
||||
return default
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.dispatcher)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.dispatcher)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.dispatcher
|
||||
|
||||
|
||||
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
||||
# python builds
|
||||
|
||||
def isSurrogatePair(data):
|
||||
return (len(data) == 2 and
|
||||
ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
|
||||
ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
|
||||
|
||||
|
||||
def surrogatePairToCodepoint(data):
|
||||
char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 +
|
||||
(ord(data[1]) - 0xDC00))
|
||||
return char_val
|
||||
|
||||
# Module Factory Factory (no, this isn't Java, I know)
|
||||
# Here to stop this being duplicated all over the place.
|
||||
|
||||
|
||||
def moduleFactoryFactory(factory):
|
||||
moduleCache = {}
|
||||
|
||||
def moduleFactory(baseModule, *args, **kwargs):
|
||||
if isinstance(ModuleType.__name__, type("")):
|
||||
name = "_%s_factory" % baseModule.__name__
|
||||
else:
|
||||
name = b"_%s_factory" % baseModule.__name__
|
||||
|
||||
kwargs_tuple = tuple(kwargs.items())
|
||||
|
||||
try:
|
||||
return moduleCache[name][args][kwargs_tuple]
|
||||
except KeyError:
|
||||
mod = ModuleType(name)
|
||||
objs = factory(baseModule, *args, **kwargs)
|
||||
mod.__dict__.update(objs)
|
||||
if "name" not in moduleCache:
|
||||
moduleCache[name] = {}
|
||||
if "args" not in moduleCache[name]:
|
||||
moduleCache[name][args] = {}
|
||||
if "kwargs" not in moduleCache[name][args]:
|
||||
moduleCache[name][args][kwargs_tuple] = {}
|
||||
moduleCache[name][args][kwargs_tuple] = mod
|
||||
return mod
|
||||
|
||||
return moduleFactory
|
||||
|
||||
|
||||
def memoize(func):
|
||||
cache = {}
|
||||
|
||||
def wrapped(*args, **kwargs):
|
||||
key = (tuple(args), tuple(kwargs.items()))
|
||||
if key not in cache:
|
||||
cache[key] = func(*args, **kwargs)
|
||||
return cache[key]
|
||||
|
||||
return wrapped
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user