Change venv

This commit is contained in:
Ambulance Clerc
2023-05-31 08:31:22 +02:00
parent fb6f579089
commit fdbb52c96f
466 changed files with 25899 additions and 64721 deletions

View File

@@ -1,6 +1,11 @@
"""A lil' TOML parser."""
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
# Licensed to PSF under a Contributor Agreement.
__all__ = ("loads", "load", "TOMLDecodeError")
__version__ = "1.0.3" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
__version__ = "2.0.1" # DO NOT EDIT THIS LINE MANUALLY. LET bump2version UTILITY DO IT
from pip._vendor.tomli._parser import TOMLDecodeError, load, loads
from ._parser import TOMLDecodeError, load, loads
# Pretend this exception was created here.
TOMLDecodeError.__module__ = __name__

View File

@@ -1,42 +1,33 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
# Licensed to PSF under a Contributor Agreement.
from __future__ import annotations
from collections.abc import Iterable
import string
from types import MappingProxyType
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
FrozenSet,
Iterable,
Optional,
TextIO,
Tuple,
)
from typing import Any, BinaryIO, NamedTuple
from pip._vendor.tomli._re import (
RE_BIN,
from ._re import (
RE_DATETIME,
RE_HEX,
RE_LOCALTIME,
RE_NUMBER,
RE_OCT,
match_to_datetime,
match_to_localtime,
match_to_number,
)
if TYPE_CHECKING:
from re import Pattern
from ._types import Key, ParseFloat, Pos
ASCII_CTRL = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
# Neither of these sets include quotation mark or backslash. They are
# currently handled as separate cases in the parser functions.
ILLEGAL_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t")
ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n\r")
ILLEGAL_MULTILINE_BASIC_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
ILLEGAL_LITERAL_STR_CHARS = ILLEGAL_BASIC_STR_CHARS
ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ASCII_CTRL - frozenset("\t\n")
ILLEGAL_MULTILINE_LITERAL_STR_CHARS = ILLEGAL_MULTILINE_BASIC_STR_CHARS
ILLEGAL_COMMENT_CHARS = ILLEGAL_BASIC_STR_CHARS
@@ -44,6 +35,7 @@ TOML_WS = frozenset(" \t")
TOML_WS_AND_NEWLINE = TOML_WS | frozenset("\n")
BARE_KEY_CHARS = frozenset(string.ascii_letters + string.digits + "-_")
KEY_INITIAL_CHARS = BARE_KEY_CHARS | frozenset("\"'")
HEXDIGIT_CHARS = frozenset(string.hexdigits)
BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
{
@@ -57,30 +49,33 @@ BASIC_STR_ESCAPE_REPLACEMENTS = MappingProxyType(
}
)
# Type annotations
ParseFloat = Callable[[str], Any]
Key = Tuple[str, ...]
Pos = int
class TOMLDecodeError(ValueError):
"""An error raised if a document is not valid TOML."""
def load(fp: TextIO, *, parse_float: ParseFloat = float) -> Dict[str, Any]:
"""Parse TOML from a file object."""
s = fp.read()
def load(__fp: BinaryIO, *, parse_float: ParseFloat = float) -> dict[str, Any]:
"""Parse TOML from a binary file object."""
b = __fp.read()
try:
s = b.decode()
except AttributeError:
raise TypeError(
"File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
) from None
return loads(s, parse_float=parse_float)
def loads(s: str, *, parse_float: ParseFloat = float) -> Dict[str, Any]: # noqa: C901
def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901
"""Parse TOML from a string."""
# The spec allows converting "\r\n" to "\n", even in string
# literals. Let's do so to simplify parsing.
src = s.replace("\r\n", "\n")
src = __s.replace("\r\n", "\n")
pos = 0
state = State()
out = Output(NestedDict(), Flags())
header: Key = ()
parse_float = make_safe_parse_float(parse_float)
# Parse one statement at a time
# (typically means one line in TOML source)
@@ -104,17 +99,18 @@ def loads(s: str, *, parse_float: ParseFloat = float) -> Dict[str, Any]: # noqa
pos += 1
continue
if char in KEY_INITIAL_CHARS:
pos = key_value_rule(src, pos, state, parse_float)
pos = key_value_rule(src, pos, out, header, parse_float)
pos = skip_chars(src, pos, TOML_WS)
elif char == "[":
try:
second_char: Optional[str] = src[pos + 1]
second_char: str | None = src[pos + 1]
except IndexError:
second_char = None
out.flags.finalize_pending()
if second_char == "[":
pos = create_list_rule(src, pos, state)
pos, header = create_list_rule(src, pos, out)
else:
pos = create_dict_rule(src, pos, state)
pos, header = create_dict_rule(src, pos, out)
pos = skip_chars(src, pos, TOML_WS)
elif char != "#":
raise suffixed_err(src, pos, "Invalid statement")
@@ -133,17 +129,7 @@ def loads(s: str, *, parse_float: ParseFloat = float) -> Dict[str, Any]: # noqa
)
pos += 1
return state.out.dict
class State:
def __init__(self) -> None:
# Mutable, read-only
self.out = NestedDict()
self.flags = Flags()
# Immutable, read and write
self.header_namespace: Key = ()
return out.data.dict
class Flags:
@@ -156,7 +142,16 @@ class Flags:
EXPLICIT_NEST = 1
def __init__(self) -> None:
self._flags: Dict[str, dict] = {}
self._flags: dict[str, dict] = {}
self._pending_flags: set[tuple[Key, int]] = set()
def add_pending(self, key: Key, flag: int) -> None:
self._pending_flags.add((key, flag))
def finalize_pending(self) -> None:
for key, flag in self._pending_flags:
self.set(key, flag, recursive=False)
self._pending_flags.clear()
def unset_all(self, key: Key) -> None:
cont = self._flags
@@ -166,19 +161,6 @@ class Flags:
cont = cont[k]["nested"]
cont.pop(key[-1], None)
def set_for_relative_key(self, head_key: Key, rel_key: Key, flag: int) -> None:
cont = self._flags
for k in head_key:
if k not in cont:
cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
cont = cont[k]["nested"]
for k in rel_key:
if k in cont:
cont[k]["flags"].add(flag)
else:
cont[k] = {"flags": {flag}, "recursive_flags": set(), "nested": {}}
cont = cont[k]["nested"]
def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
cont = self._flags
key_parent, key_stem = key[:-1], key[-1]
@@ -211,7 +193,7 @@ class Flags:
class NestedDict:
def __init__(self) -> None:
# The parsed content of the TOML document
self.dict: Dict[str, Any] = {}
self.dict: dict[str, Any] = {}
def get_or_create_nest(
self,
@@ -242,6 +224,11 @@ class NestedDict:
cont[last_key] = [{}]
class Output(NamedTuple):
data: NestedDict
flags: Flags
def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
try:
while src[pos] in chars:
@@ -256,7 +243,7 @@ def skip_until(
pos: Pos,
expect: str,
*,
error_on: FrozenSet[str],
error_on: frozenset[str],
error_on_eof: bool,
) -> Pos:
try:
@@ -264,19 +251,18 @@ def skip_until(
except ValueError:
new_pos = len(src)
if error_on_eof:
raise suffixed_err(src, new_pos, f'Expected "{expect!r}"')
raise suffixed_err(src, new_pos, f"Expected {expect!r}") from None
bad_chars = error_on.intersection(src[pos:new_pos])
if bad_chars:
bad_char = next(iter(bad_chars))
bad_pos = src.index(bad_char, pos)
raise suffixed_err(src, bad_pos, f'Found invalid character "{bad_char!r}"')
if not error_on.isdisjoint(src[pos:new_pos]):
while src[pos] not in error_on:
pos += 1
raise suffixed_err(src, pos, f"Found invalid character {src[pos]!r}")
return new_pos
def skip_comment(src: str, pos: Pos) -> Pos:
try:
char: Optional[str] = src[pos]
char: str | None = src[pos]
except IndexError:
char = None
if char == "#":
@@ -295,115 +281,116 @@ def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
return pos
def create_dict_rule(src: str, pos: Pos, state: State) -> Pos:
def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
pos += 1 # Skip "["
pos = skip_chars(src, pos, TOML_WS)
pos, key = parse_key(src, pos)
if state.flags.is_(key, Flags.EXPLICIT_NEST) or state.flags.is_(key, Flags.FROZEN):
raise suffixed_err(src, pos, f"Can not declare {key} twice")
state.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
raise suffixed_err(src, pos, f"Cannot declare {key} twice")
out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
try:
state.out.get_or_create_nest(key)
out.data.get_or_create_nest(key)
except KeyError:
raise suffixed_err(src, pos, "Can not overwrite a value")
state.header_namespace = key
raise suffixed_err(src, pos, "Cannot overwrite a value") from None
if src[pos : pos + 1] != "]":
raise suffixed_err(src, pos, 'Expected "]" at the end of a table declaration')
return pos + 1
if not src.startswith("]", pos):
raise suffixed_err(src, pos, "Expected ']' at the end of a table declaration")
return pos + 1, key
def create_list_rule(src: str, pos: Pos, state: State) -> Pos:
def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
pos += 2 # Skip "[["
pos = skip_chars(src, pos, TOML_WS)
pos, key = parse_key(src, pos)
if state.flags.is_(key, Flags.FROZEN):
raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}")
if out.flags.is_(key, Flags.FROZEN):
raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}")
# Free the namespace now that it points to another empty list item...
state.flags.unset_all(key)
out.flags.unset_all(key)
# ...but this key precisely is still prohibited from table declaration
state.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
try:
state.out.append_nest_to_list(key)
out.data.append_nest_to_list(key)
except KeyError:
raise suffixed_err(src, pos, "Can not overwrite a value")
state.header_namespace = key
raise suffixed_err(src, pos, "Cannot overwrite a value") from None
end_marker = src[pos : pos + 2]
if end_marker != "]]":
raise suffixed_err(
src,
pos,
f'Found "{end_marker!r}" at the end of an array declaration.'
' Expected "]]"',
)
return pos + 2
if not src.startswith("]]", pos):
raise suffixed_err(src, pos, "Expected ']]' at the end of an array declaration")
return pos + 2, key
def key_value_rule(src: str, pos: Pos, state: State, parse_float: ParseFloat) -> Pos:
def key_value_rule(
src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
) -> Pos:
pos, key, value = parse_key_value_pair(src, pos, parse_float)
key_parent, key_stem = key[:-1], key[-1]
abs_key_parent = state.header_namespace + key_parent
abs_key_parent = header + key_parent
if state.flags.is_(abs_key_parent, Flags.FROZEN):
relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
for cont_key in relative_path_cont_keys:
# Check that dotted key syntax does not redefine an existing table
if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
raise suffixed_err(src, pos, f"Cannot redefine namespace {cont_key}")
# Containers in the relative path can't be opened with the table syntax or
# dotted key/value syntax in following table sections.
out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
if out.flags.is_(abs_key_parent, Flags.FROZEN):
raise suffixed_err(
src, pos, f"Can not mutate immutable namespace {abs_key_parent}"
src, pos, f"Cannot mutate immutable namespace {abs_key_parent}"
)
# Containers in the relative path can't be opened with the table syntax after this
state.flags.set_for_relative_key(state.header_namespace, key, Flags.EXPLICIT_NEST)
try:
nest = state.out.get_or_create_nest(abs_key_parent)
nest = out.data.get_or_create_nest(abs_key_parent)
except KeyError:
raise suffixed_err(src, pos, "Can not overwrite a value")
raise suffixed_err(src, pos, "Cannot overwrite a value") from None
if key_stem in nest:
raise suffixed_err(src, pos, "Can not overwrite a value")
raise suffixed_err(src, pos, "Cannot overwrite a value")
# Mark inline table and array namespaces recursively immutable
if isinstance(value, (dict, list)):
abs_key = state.header_namespace + key
state.flags.set(abs_key, Flags.FROZEN, recursive=True)
out.flags.set(header + key, Flags.FROZEN, recursive=True)
nest[key_stem] = value
return pos
def parse_key_value_pair(
src: str, pos: Pos, parse_float: ParseFloat
) -> Tuple[Pos, Key, Any]:
) -> tuple[Pos, Key, Any]:
pos, key = parse_key(src, pos)
try:
char: Optional[str] = src[pos]
char: str | None = src[pos]
except IndexError:
char = None
if char != "=":
raise suffixed_err(src, pos, 'Expected "=" after a key in a key/value pair')
raise suffixed_err(src, pos, "Expected '=' after a key in a key/value pair")
pos += 1
pos = skip_chars(src, pos, TOML_WS)
pos, value = parse_value(src, pos, parse_float)
return pos, key, value
def parse_key(src: str, pos: Pos) -> Tuple[Pos, Key]:
def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
pos, key_part = parse_key_part(src, pos)
key = [key_part]
key: Key = (key_part,)
pos = skip_chars(src, pos, TOML_WS)
while True:
try:
char: Optional[str] = src[pos]
char: str | None = src[pos]
except IndexError:
char = None
if char != ".":
return pos, tuple(key)
return pos, key
pos += 1
pos = skip_chars(src, pos, TOML_WS)
pos, key_part = parse_key_part(src, pos)
key.append(key_part)
key += (key_part,)
pos = skip_chars(src, pos, TOML_WS)
def parse_key_part(src: str, pos: Pos) -> Tuple[Pos, str]:
def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
try:
char: Optional[str] = src[pos]
char: str | None = src[pos]
except IndexError:
char = None
if char in BARE_KEY_CHARS:
@@ -417,17 +404,17 @@ def parse_key_part(src: str, pos: Pos) -> Tuple[Pos, str]:
raise suffixed_err(src, pos, "Invalid initial character for a key part")
def parse_one_line_basic_str(src: str, pos: Pos) -> Tuple[Pos, str]:
def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
pos += 1
return parse_basic_str(src, pos, multiline=False)
def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos, list]:
def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, list]:
pos += 1
array: list = []
pos = skip_comments_and_array_ws(src, pos)
if src[pos : pos + 1] == "]":
if src.startswith("]", pos):
return pos + 1, array
while True:
pos, val = parse_value(src, pos, parse_float)
@@ -442,29 +429,29 @@ def parse_array(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos, list]
pos += 1
pos = skip_comments_and_array_ws(src, pos)
if src[pos : pos + 1] == "]":
if src.startswith("]", pos):
return pos + 1, array
def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos, dict]:
def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> tuple[Pos, dict]:
pos += 1
nested_dict = NestedDict()
flags = Flags()
pos = skip_chars(src, pos, TOML_WS)
if src[pos : pos + 1] == "}":
if src.startswith("}", pos):
return pos + 1, nested_dict.dict
while True:
pos, key, value = parse_key_value_pair(src, pos, parse_float)
key_parent, key_stem = key[:-1], key[-1]
if flags.is_(key, Flags.FROZEN):
raise suffixed_err(src, pos, f"Can not mutate immutable namespace {key}")
raise suffixed_err(src, pos, f"Cannot mutate immutable namespace {key}")
try:
nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
except KeyError:
raise suffixed_err(src, pos, "Can not overwrite a value")
raise suffixed_err(src, pos, "Cannot overwrite a value") from None
if key_stem in nest:
raise suffixed_err(src, pos, f'Duplicate inline table key "{key_stem}"')
raise suffixed_err(src, pos, f"Duplicate inline table key {key_stem!r}")
nest[key_stem] = value
pos = skip_chars(src, pos, TOML_WS)
c = src[pos : pos + 1]
@@ -480,7 +467,7 @@ def parse_inline_table(src: str, pos: Pos, parse_float: ParseFloat) -> Tuple[Pos
def parse_basic_str_escape(
src: str, pos: Pos, *, multiline: bool = False
) -> Tuple[Pos, str]:
) -> tuple[Pos, str]:
escape_id = src[pos : pos + 2]
pos += 2
if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
@@ -488,11 +475,12 @@ def parse_basic_str_escape(
# the doc. Error if non-whitespace is found before newline.
if escape_id != "\\\n":
pos = skip_chars(src, pos, TOML_WS)
char = src[pos : pos + 1]
if not char:
try:
char = src[pos]
except IndexError:
return pos, ""
if char != "\n":
raise suffixed_err(src, pos, 'Unescaped "\\" in a string')
raise suffixed_err(src, pos, "Unescaped '\\' in a string")
pos += 1
pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
return pos, ""
@@ -503,18 +491,16 @@ def parse_basic_str_escape(
try:
return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
except KeyError:
if len(escape_id) != 2:
raise suffixed_err(src, pos, "Unterminated string")
raise suffixed_err(src, pos, 'Unescaped "\\" in a string')
raise suffixed_err(src, pos, "Unescaped '\\' in a string") from None
def parse_basic_str_escape_multiline(src: str, pos: Pos) -> Tuple[Pos, str]:
def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
return parse_basic_str_escape(src, pos, multiline=True)
def parse_hex_char(src: str, pos: Pos, hex_len: int) -> Tuple[Pos, str]:
def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
hex_str = src[pos : pos + hex_len]
if len(hex_str) != hex_len or any(c not in string.hexdigits for c in hex_str):
if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
raise suffixed_err(src, pos, "Invalid hex value")
pos += hex_len
hex_int = int(hex_str, 16)
@@ -523,7 +509,7 @@ def parse_hex_char(src: str, pos: Pos, hex_len: int) -> Tuple[Pos, str]:
return pos, chr(hex_int)
def parse_literal_str(src: str, pos: Pos) -> Tuple[Pos, str]:
def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
pos += 1 # Skip starting apostrophe
start_pos = pos
pos = skip_until(
@@ -532,9 +518,9 @@ def parse_literal_str(src: str, pos: Pos) -> Tuple[Pos, str]:
return pos + 1, src[start_pos:pos] # Skip ending apostrophe
def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> Tuple[Pos, str]:
def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
pos += 3
if src[pos : pos + 1] == "\n":
if src.startswith("\n", pos):
pos += 1
if literal:
@@ -554,16 +540,16 @@ def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> Tuple[Pos, str]
# Add at maximum two extra apostrophes/quotes if the end sequence
# is 4 or 5 chars long instead of just 3.
if src[pos : pos + 1] != delim:
if not src.startswith(delim, pos):
return pos, result
pos += 1
if src[pos : pos + 1] != delim:
if not src.startswith(delim, pos):
return pos, result + delim
pos += 1
return pos, result + (delim * 2)
def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> Tuple[Pos, str]:
def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
if multiline:
error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
parse_escapes = parse_basic_str_escape_multiline
@@ -576,11 +562,11 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> Tuple[Pos, str]:
try:
char = src[pos]
except IndexError:
raise suffixed_err(src, pos, "Unterminated string")
raise suffixed_err(src, pos, "Unterminated string") from None
if char == '"':
if not multiline:
return pos + 1, result + src[start_pos:pos]
if src[pos + 1 : pos + 3] == '""':
if src.startswith('"""', pos):
return pos + 3, result + src[start_pos:pos]
pos += 1
continue
@@ -591,78 +577,40 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> Tuple[Pos, str]:
start_pos = pos
continue
if char in error_on:
raise suffixed_err(src, pos, f'Illegal character "{char!r}"')
raise suffixed_err(src, pos, f"Illegal character {char!r}")
pos += 1
def parse_regex(src: str, pos: Pos, regex: "Pattern") -> Tuple[Pos, str]:
match = regex.match(src, pos)
if not match:
raise suffixed_err(src, pos, "Unexpected sequence")
return match.end(), match.group()
def parse_value( # noqa: C901
src: str, pos: Pos, parse_float: ParseFloat
) -> Tuple[Pos, Any]:
) -> tuple[Pos, Any]:
try:
char: Optional[str] = src[pos]
char: str | None = src[pos]
except IndexError:
char = None
# IMPORTANT: order conditions based on speed of checking and likelihood
# Basic strings
if char == '"':
if src[pos + 1 : pos + 3] == '""':
if src.startswith('"""', pos):
return parse_multiline_str(src, pos, literal=False)
return parse_one_line_basic_str(src, pos)
# Literal strings
if char == "'":
if src[pos + 1 : pos + 3] == "''":
if src.startswith("'''", pos):
return parse_multiline_str(src, pos, literal=True)
return parse_literal_str(src, pos)
# Booleans
if char == "t":
if src[pos + 1 : pos + 4] == "rue":
if src.startswith("true", pos):
return pos + 4, True
if char == "f":
if src[pos + 1 : pos + 5] == "alse":
if src.startswith("false", pos):
return pos + 5, False
# Dates and times
datetime_match = RE_DATETIME.match(src, pos)
if datetime_match:
try:
datetime_obj = match_to_datetime(datetime_match)
except ValueError:
raise suffixed_err(src, pos, "Invalid date or datetime")
return datetime_match.end(), datetime_obj
localtime_match = RE_LOCALTIME.match(src, pos)
if localtime_match:
return localtime_match.end(), match_to_localtime(localtime_match)
# Non-decimal integers
if char == "0":
second_char = src[pos + 1 : pos + 2]
if second_char == "x":
pos, hex_str = parse_regex(src, pos + 2, RE_HEX)
return pos, int(hex_str, 16)
if second_char == "o":
pos, oct_str = parse_regex(src, pos + 2, RE_OCT)
return pos, int(oct_str, 8)
if second_char == "b":
pos, bin_str = parse_regex(src, pos + 2, RE_BIN)
return pos, int(bin_str, 2)
# Decimal integers and "normal" floats.
# The regex will greedily match any type starting with a decimal
# char, so needs to be located after handling of non-decimal ints,
# and dates and times.
number_match = RE_NUMBER.match(src, pos)
if number_match:
return number_match.end(), match_to_number(number_match, parse_float)
# Arrays
if char == "[":
return parse_array(src, pos, parse_float)
@@ -671,6 +619,25 @@ def parse_value( # noqa: C901
if char == "{":
return parse_inline_table(src, pos, parse_float)
# Dates and times
datetime_match = RE_DATETIME.match(src, pos)
if datetime_match:
try:
datetime_obj = match_to_datetime(datetime_match)
except ValueError as e:
raise suffixed_err(src, pos, "Invalid date or datetime") from e
return datetime_match.end(), datetime_obj
localtime_match = RE_LOCALTIME.match(src, pos)
if localtime_match:
return localtime_match.end(), match_to_localtime(localtime_match)
# Integers and "normal" floats.
# The regex will greedily match any type starting with a decimal
# char, so needs to be located after handling of dates and times.
number_match = RE_NUMBER.match(src, pos)
if number_match:
return number_match.end(), match_to_number(number_match, parse_float)
# Special floats
first_three = src[pos : pos + 3]
if first_three in {"inf", "nan"}:
@@ -701,3 +668,24 @@ def suffixed_err(src: str, pos: Pos, msg: str) -> TOMLDecodeError:
def is_unicode_scalar_value(codepoint: int) -> bool:
return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
"""A decorator to make `parse_float` safe.
`parse_float` must not return dicts or lists, because these types
would be mixed with parsed TOML tables and arrays, thus confusing
the parser. The returned decorated callable raises `ValueError`
instead of returning illegal types.
"""
# The default `float` callable never returns illegal types. Optimize it.
if parse_float is float: # type: ignore[comparison-overlap]
return float
def safe_parse_float(float_str: str) -> Any:
float_value = parse_float(float_str)
if isinstance(float_value, (dict, list)):
raise ValueError("parse_float must not return dicts or lists")
return float_value
return safe_parse_float

View File

@@ -1,37 +1,55 @@
# SPDX-License-Identifier: MIT
# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
# Licensed to PSF under a Contributor Agreement.
from __future__ import annotations
from datetime import date, datetime, time, timedelta, timezone, tzinfo
from functools import lru_cache
import re
from typing import TYPE_CHECKING, Any, Optional, Union
from typing import Any
if TYPE_CHECKING:
from re import Match
from pip._vendor.tomli._parser import ParseFloat
from ._types import ParseFloat
# E.g.
# - 00:32:00.999999
# - 00:32:00
_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?"
_TIME_RE_STR = r"([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9])(?:\.([0-9]{1,6})[0-9]*)?"
RE_HEX = re.compile(r"[0-9A-Fa-f](?:_?[0-9A-Fa-f])*")
RE_BIN = re.compile(r"[01](?:_?[01])*")
RE_OCT = re.compile(r"[0-7](?:_?[0-7])*")
RE_NUMBER = re.compile(
r"[+-]?(?:0|[1-9](?:_?[0-9])*)" # integer
+ r"(?:\.[0-9](?:_?[0-9])*)?" # optional fractional part
+ r"(?:[eE][+-]?[0-9](?:_?[0-9])*)?" # optional exponent part
r"""
0
(?:
x[0-9A-Fa-f](?:_?[0-9A-Fa-f])* # hex
|
b[01](?:_?[01])* # bin
|
o[0-7](?:_?[0-7])* # oct
)
|
[+-]?(?:0|[1-9](?:_?[0-9])*) # dec, integer part
(?P<floatpart>
(?:\.[0-9](?:_?[0-9])*)? # optional fractional part
(?:[eE][+-]?[0-9](?:_?[0-9])*)? # optional exponent part
)
""",
flags=re.VERBOSE,
)
RE_LOCALTIME = re.compile(_TIME_RE_STR)
RE_DATETIME = re.compile(
r"([0-9]{4})-(0[1-9]|1[0-2])-(0[1-9]|1[0-9]|2[0-9]|3[01])" # date, e.g. 1988-10-27
+ r"(?:"
+ r"[T ]"
+ _TIME_RE_STR
+ r"(?:(Z)|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))?" # time offset
+ r")?"
rf"""
([0-9]{{4}})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) # date, e.g. 1988-10-27
(?:
[Tt ]
{_TIME_RE_STR}
(?:([Zz])|([+-])([01][0-9]|2[0-3]):([0-5][0-9]))? # optional time offset
)?
""",
flags=re.VERBOSE,
)
def match_to_datetime(match: "Match") -> Union[datetime, date]:
def match_to_datetime(match: re.Match) -> datetime | date:
"""Convert a `RE_DATETIME` match to `datetime.datetime` or `datetime.date`.
Raises ValueError if the match does not correspond to a valid date
@@ -46,7 +64,7 @@ def match_to_datetime(match: "Match") -> Union[datetime, date]:
sec_str,
micros_str,
zulu_time,
offset_dir_str,
offset_sign_str,
offset_hour_str,
offset_minute_str,
) = match.groups()
@@ -54,14 +72,10 @@ def match_to_datetime(match: "Match") -> Union[datetime, date]:
if hour_str is None:
return date(year, month, day)
hour, minute, sec = int(hour_str), int(minute_str), int(sec_str)
micros = int(micros_str[1:].ljust(6, "0")[:6]) if micros_str else 0
if offset_dir_str:
offset_dir = 1 if offset_dir_str == "+" else -1
tz: Optional[tzinfo] = timezone(
timedelta(
hours=offset_dir * int(offset_hour_str),
minutes=offset_dir * int(offset_minute_str),
)
micros = int(micros_str.ljust(6, "0")) if micros_str else 0
if offset_sign_str:
tz: tzinfo | None = cached_tz(
offset_hour_str, offset_minute_str, offset_sign_str
)
elif zulu_time:
tz = timezone.utc
@@ -70,14 +84,24 @@ def match_to_datetime(match: "Match") -> Union[datetime, date]:
return datetime(year, month, day, hour, minute, sec, micros, tzinfo=tz)
def match_to_localtime(match: "Match") -> time:
@lru_cache(maxsize=None)
def cached_tz(hour_str: str, minute_str: str, sign_str: str) -> timezone:
sign = 1 if sign_str == "+" else -1
return timezone(
timedelta(
hours=sign * int(hour_str),
minutes=sign * int(minute_str),
)
)
def match_to_localtime(match: re.Match) -> time:
hour_str, minute_str, sec_str, micros_str = match.groups()
micros = int(micros_str[1:].ljust(6, "0")[:6]) if micros_str else 0
micros = int(micros_str.ljust(6, "0")) if micros_str else 0
return time(int(hour_str), int(minute_str), int(sec_str), micros)
def match_to_number(match: "Match", parse_float: "ParseFloat") -> Any:
match_str = match.group()
if "." in match_str or "e" in match_str or "E" in match_str:
return parse_float(match_str)
return int(match_str)
def match_to_number(match: re.Match, parse_float: ParseFloat) -> Any:
if match.group("floatpart"):
return parse_float(match.group())
return int(match.group(), 0)