Change venv

This commit is contained in:
Ambulance Clerc
2023-05-31 08:31:22 +02:00
parent fb6f579089
commit fdbb52c96f
466 changed files with 25899 additions and 64721 deletions

View File

@@ -25,45 +25,46 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from .charsetprober import CharSetProber
from .enums import ProbingState, MachineState
from .codingstatemachine import CodingStateMachine
from .mbcssm import UTF8_SM_MODEL
from typing import Union
from .charsetprober import CharSetProber
from .codingstatemachine import CodingStateMachine
from .enums import MachineState, ProbingState
from .mbcssm import UTF8_SM_MODEL
class UTF8Prober(CharSetProber):
ONE_CHAR_PROB = 0.5
def __init__(self):
super(UTF8Prober, self).__init__()
def __init__(self) -> None:
super().__init__()
self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
self._num_mb_chars = None
self._num_mb_chars = 0
self.reset()
def reset(self):
super(UTF8Prober, self).reset()
def reset(self) -> None:
super().reset()
self.coding_sm.reset()
self._num_mb_chars = 0
@property
def charset_name(self):
def charset_name(self) -> str:
return "utf-8"
@property
def language(self):
def language(self) -> str:
return ""
def feed(self, byte_str):
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
for c in byte_str:
coding_state = self.coding_sm.next_state(c)
if coding_state == MachineState.ERROR:
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.ITS_ME:
if coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.START:
if coding_state == MachineState.START:
if self.coding_sm.get_current_charlen() >= 2:
self._num_mb_chars += 1
@@ -73,10 +74,9 @@ class UTF8Prober(CharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
unlike = 0.99
if self._num_mb_chars < 6:
unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
unlike *= self.ONE_CHAR_PROB**self._num_mb_chars
return 1.0 - unlike
else:
return unlike
return unlike