Change venv

This commit is contained in:
Ambulance Clerc
2023-05-31 08:31:22 +02:00
parent fb6f579089
commit fdbb52c96f
466 changed files with 25899 additions and 64721 deletions

View File

@@ -26,6 +26,8 @@
# 02110-1301 USA
######################### END LICENSE BLOCK #########################
from typing import List, Union
from .charsetprober import CharSetProber
from .enums import ProbingState
@@ -41,6 +43,7 @@ ASV = 6 # accent small vowel
ASO = 7 # accent small other
CLASS_NUM = 8 # total classes
# fmt: off
Latin1_CharToClass = (
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
@@ -91,34 +94,34 @@ Latin1ClassModel = (
0, 3, 1, 3, 1, 1, 1, 3, # ASV
0, 3, 1, 3, 1, 1, 3, 3, # ASO
)
# fmt: on
class Latin1Prober(CharSetProber):
def __init__(self):
super(Latin1Prober, self).__init__()
self._last_char_class = None
self._freq_counter = None
def __init__(self) -> None:
super().__init__()
self._last_char_class = OTH
self._freq_counter: List[int] = []
self.reset()
def reset(self):
def reset(self) -> None:
self._last_char_class = OTH
self._freq_counter = [0] * FREQ_CAT_NUM
CharSetProber.reset(self)
super().reset()
@property
def charset_name(self):
def charset_name(self) -> str:
return "ISO-8859-1"
@property
def language(self):
def language(self) -> str:
return ""
def feed(self, byte_str):
byte_str = self.filter_with_english_letters(byte_str)
def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState:
byte_str = self.remove_xml_tags(byte_str)
for c in byte_str:
char_class = Latin1_CharToClass[c]
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
+ char_class]
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) + char_class]
if freq == 0:
self._state = ProbingState.NOT_ME
break
@@ -127,19 +130,18 @@ class Latin1Prober(CharSetProber):
return self.state
def get_confidence(self):
def get_confidence(self) -> float:
if self.state == ProbingState.NOT_ME:
return 0.01
total = sum(self._freq_counter)
if total < 0.01:
confidence = 0.0
else:
confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
/ total)
if confidence < 0.0:
confidence = 0.0
confidence = (
0.0
if total < 0.01
else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total
)
confidence = max(confidence, 0.0)
# lower the confidence of latin1 so that other more accurate
# detector can take priority.
confidence = confidence * 0.73
confidence *= 0.73
return confidence