Update requests to 2.21.0

Also updates: - certifi-2018.11.29 - chardet-3.0.4 - idna-2.8 - urllib3-1.24.1
2025-08-14 02:26:53 -07:00 · 2018-12-15 01:05:44 -05:00 · 2018-12-15 01:05:44 -05:00 · 72226bffd8
commit 72226bffd8
parent 367f7f3a61
157 changed files with 22518 additions and 15476 deletions
--- a/libs/chardet/codingstatemachine.py
+++ b/libs/chardet/codingstatemachine.py
@ -25,37 +25,64 @@
 # 02110-1301  USA
 ######################### END LICENSE BLOCK #########################

-from .constants import eStart
-from .compat import wrap_ord
+import logging
+
+from .enums import MachineState


-class CodingStateMachine:
+class CodingStateMachine(object):
+    """
+    A state machine to verify a byte sequence for a particular encoding. For
+    each byte the detector receives, it will feed that byte to every active
+    state machine available, one byte at a time. The state machine changes its
+    state based on its previous state and the byte it receives. There are 3
+    states in a state machine that are of interest to an auto-detector:
+
+    START state: This is the state to start with, or a legal byte sequence
+                 (i.e. a valid code point) for character has been identified.
+
+    ME state:  This indicates that the state machine identified a byte sequence
+               that is specific to the charset it is designed for and that
+               there is no other possible encoding which can contain this byte
+               sequence. This will to lead to an immediate positive answer for
+               the detector.
+
+    ERROR state: This indicates the state machine identified an illegal byte
+                 sequence for that encoding. This will lead to an immediate
+                 negative answer for this encoding. Detector will exclude this
+                 encoding from consideration from here on.
+    """
    def __init__(self, sm):
-        self._mModel = sm
-        self._mCurrentBytePos = 0
-        self._mCurrentCharLen = 0
+        self._model = sm
+        self._curr_byte_pos = 0
+        self._curr_char_len = 0
+        self._curr_state = None
+        self.logger = logging.getLogger(__name__)
        self.reset()

    def reset(self):
-        self._mCurrentState = eStart
+        self._curr_state = MachineState.START

    def next_state(self, c):
        # for each byte we get its class
        # if it is first byte, we also get byte length
-        # PY3K: aBuf is a byte stream, so c is an int, not a byte
-        byteCls = self._mModel['classTable'][wrap_ord(c)]
-        if self._mCurrentState == eStart:
-            self._mCurrentBytePos = 0
-            self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
-        # from byte's class and stateTable, we get its next state
-        curr_state = (self._mCurrentState * self._mModel['classFactor']
-                      + byteCls)
-        self._mCurrentState = self._mModel['stateTable'][curr_state]
-        self._mCurrentBytePos += 1
-        return self._mCurrentState
+        byte_class = self._model['class_table'][c]
+        if self._curr_state == MachineState.START:
+            self._curr_byte_pos = 0
+            self._curr_char_len = self._model['char_len_table'][byte_class]
+        # from byte's class and state_table, we get its next state
+        curr_state = (self._curr_state * self._model['class_factor']
+                      + byte_class)
+        self._curr_state = self._model['state_table'][curr_state]
+        self._curr_byte_pos += 1
+        return self._curr_state

    def get_current_charlen(self):
-        return self._mCurrentCharLen
+        return self._curr_char_len

    def get_coding_state_machine(self):
-        return self._mModel['name']
+        return self._model['name']
+
+    @property
+    def language(self):
+        return self._model['language']