Update dnspython-2.2.0

2025-07-06 13:11:15 -07:00 · 2021-10-14 21:36:41 -07:00 · 2021-10-14 21:36:41 -07:00 · 4d62245cf5
commit 4d62245cf5
parent 4b28040d59
111 changed files with 9077 additions and 5877 deletions
--- a/lib/dns/tokenizer.py
+++ b/lib/dns/tokenizer.py
@ -1,4 +1,6 @@
-# Copyright (C) 2003-2007, 2009-2011 Nominum, Inc.
+# Copyright (C) Dnspython Contributors, see LICENSE for text of ISC license
+
+# Copyright (C) 2003-2017 Nominum, Inc.
 #
 # Permission to use, copy, modify, and distribute this software and its
 # documentation for any purpose with or without fee is hereby granted,
@ -15,24 +17,15 @@

 """Tokenize DNS master file format"""

-from io import StringIO
+import io
 import sys

 import dns.exception
 import dns.name
 import dns.ttl
-from ._compat import long, text_type, binary_type

-_DELIMITERS = {
-    ' ': True,
-    '\t': True,
-    '\n': True,
-    ';': True,
-    '(': True,
-    ')': True,
-    '"': True}
-
-_QUOTING_DELIMITERS = {'"': True}
+_DELIMITERS = {' ', '\t', '\n', ';', '(', ')', '"'}
+_QUOTING_DELIMITERS = {'"'}

 EOF = 0
 EOL = 1
@ -44,32 +37,20 @@ DELIMITER = 6


 class UngetBufferFull(dns.exception.DNSException):
-
    """An attempt was made to unget a token when the unget buffer was full."""


-class Token(object):
-
+class Token:
    """A DNS master file format token.

-    @ivar ttype: The token type
-    @type ttype: int
-    @ivar value: The token value
-    @type value: string
-    @ivar has_escape: Does the token value contain escapes?
-    @type has_escape: bool
+    ttype: The token type
+    value: The token value
+    has_escape: Does the token value contain escapes?
    """

    def __init__(self, ttype, value='', has_escape=False):
-        """Initialize a token instance.
+        """Initialize a token instance."""

-        @param ttype: The token type
-        @type ttype: int
-        @param value: The token value
-        @type value: string
-        @param has_escape: Does the token value contain escapes?
-        @type has_escape: bool
-        """
        self.ttype = ttype
        self.value = value
        self.has_escape = has_escape
@ -92,11 +73,11 @@ class Token(object):
    def is_comment(self):
        return self.ttype == COMMENT

-    def is_delimiter(self):
+    def is_delimiter(self):  # pragma: no cover (we don't return delimiters yet)
        return self.ttype == DELIMITER

    def is_eol_or_eof(self):
-        return (self.ttype == EOL or self.ttype == EOF)
+        return self.ttype == EOL or self.ttype == EOF

    def __eq__(self, other):
        if not isinstance(other, Token):
@ -142,72 +123,120 @@ class Token(object):
            unescaped += c
        return Token(self.ttype, unescaped)

-    # compatibility for old-style tuple tokens
-
-    def __len__(self):
-        return 2
-
-    def __iter__(self):
-        return iter((self.ttype, self.value))
-
-    def __getitem__(self, i):
-        if i == 0:
-            return self.ttype
-        elif i == 1:
-            return self.value
-        else:
-            raise IndexError
+    def unescape_to_bytes(self):
+        # We used to use unescape() for TXT-like records, but this
+        # caused problems as we'd process DNS escapes into Unicode code
+        # points instead of byte values, and then a to_text() of the
+        # processed data would not equal the original input.  For
+        # example, \226 in the TXT record would have a to_text() of
+        # \195\162 because we applied UTF-8 encoding to Unicode code
+        # point 226.
+        #
+        # We now apply escapes while converting directly to bytes,
+        # avoiding this double encoding.
+        #
+        # This code also handles cases where the unicode input has
+        # non-ASCII code-points in it by converting it to UTF-8.  TXT
+        # records aren't defined for Unicode, but this is the best we
+        # can do to preserve meaning.  For example,
+        #
+        #     foo\u200bbar
+        #
+        # (where \u200b is Unicode code point 0x200b) will be treated
+        # as if the input had been the UTF-8 encoding of that string,
+        # namely:
+        #
+        #     foo\226\128\139bar
+        #
+        unescaped = b''
+        l = len(self.value)
+        i = 0
+        while i < l:
+            c = self.value[i]
+            i += 1
+            if c == '\\':
+                if i >= l:
+                    raise dns.exception.UnexpectedEnd
+                c = self.value[i]
+                i += 1
+                if c.isdigit():
+                    if i >= l:
+                        raise dns.exception.UnexpectedEnd
+                    c2 = self.value[i]
+                    i += 1
+                    if i >= l:
+                        raise dns.exception.UnexpectedEnd
+                    c3 = self.value[i]
+                    i += 1
+                    if not (c2.isdigit() and c3.isdigit()):
+                        raise dns.exception.SyntaxError
+                    unescaped += b'%c' % (int(c) * 100 + int(c2) * 10 + int(c3))
+                else:
+                    # Note that as mentioned above, if c is a Unicode
+                    # code point outside of the ASCII range, then this
+                    # += is converting that code point to its UTF-8
+                    # encoding and appending multiple bytes to
+                    # unescaped.
+                    unescaped += c.encode()
+            else:
+                unescaped += c.encode()
+        return Token(self.ttype, bytes(unescaped))


-class Tokenizer(object):
-
+class Tokenizer:
    """A DNS master file format tokenizer.

-    A token is a (type, value) tuple, where I{type} is an int, and
-    I{value} is a string.  The valid types are EOF, EOL, WHITESPACE,
-    IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
+    A token object is basically a (type, value) tuple.  The valid
+    types are EOF, EOL, WHITESPACE, IDENTIFIER, QUOTED_STRING,
+    COMMENT, and DELIMITER.

-    @ivar file: The file to tokenize
-    @type file: file
-    @ivar ungotten_char: The most recently ungotten character, or None.
-    @type ungotten_char: string
-    @ivar ungotten_token: The most recently ungotten token, or None.
-    @type ungotten_token: (int, string) token tuple
-    @ivar multiline: The current multiline level.  This value is increased
+    file: The file to tokenize
+
+    ungotten_char: The most recently ungotten character, or None.
+
+    ungotten_token: The most recently ungotten token, or None.
+
+    multiline: The current multiline level.  This value is increased
    by one every time a '(' delimiter is read, and decreased by one every time
    a ')' delimiter is read.
-    @type multiline: int
-    @ivar quoting: This variable is true if the tokenizer is currently
+
+    quoting: This variable is true if the tokenizer is currently
    reading a quoted string.
-    @type quoting: bool
-    @ivar eof: This variable is true if the tokenizer has encountered EOF.
-    @type eof: bool
-    @ivar delimiters: The current delimiter dictionary.
-    @type delimiters: dict
-    @ivar line_number: The current line number
-    @type line_number: int
-    @ivar filename: A filename that will be returned by the L{where} method.
-    @type filename: string
+
+    eof: This variable is true if the tokenizer has encountered EOF.
+
+    delimiters: The current delimiter dictionary.
+
+    line_number: The current line number
+
+    filename: A filename that will be returned by the where() method.
+
+    idna_codec: A dns.name.IDNACodec, specifies the IDNA
+    encoder/decoder.  If None, the default IDNA 2003
+    encoder/decoder is used.
    """

-    def __init__(self, f=sys.stdin, filename=None):
+    def __init__(self, f=sys.stdin, filename=None, idna_codec=None):
        """Initialize a tokenizer instance.

-        @param f: The file to tokenize.  The default is sys.stdin.
+        f: The file to tokenize.  The default is sys.stdin.
        This parameter may also be a string, in which case the tokenizer
        will take its input from the contents of the string.
-        @type f: file or string
-        @param filename: the name of the filename that the L{where} method
+
+        filename: the name of the filename that the where() method
        will return.
-        @type filename: string
+
+        idna_codec: A dns.name.IDNACodec, specifies the IDNA
+        encoder/decoder.  If None, the default IDNA 2003
+        encoder/decoder is used.
        """

-        if isinstance(f, text_type):
-            f = StringIO(f)
+        if isinstance(f, str):
+            f = io.StringIO(f)
            if filename is None:
                filename = '<string>'
-        elif isinstance(f, binary_type):
-            f = StringIO(f.decode())
+        elif isinstance(f, bytes):
+            f = io.StringIO(f.decode())
            if filename is None:
                filename = '<string>'
        else:
@ -225,10 +254,12 @@ class Tokenizer(object):
        self.delimiters = _DELIMITERS
        self.line_number = 1
        self.filename = filename
+        if idna_codec is None:
+            idna_codec = dns.name.IDNA_2003
+        self.idna_codec = idna_codec

    def _get_char(self):
        """Read a character from input.
-        @rtype: string
        """

        if self.ungotten_char is None:
@ -248,7 +279,7 @@ class Tokenizer(object):
    def where(self):
        """Return the current location in the input.

-        @rtype: (string, int) tuple.  The first item is the filename of
+        Returns a (string, int) tuple.  The first item is the filename of
        the input, the second is the current line number.
        """

@ -261,13 +292,13 @@ class Tokenizer(object):
        an error to try to unget a character when the unget buffer is not
        empty.

-        @param c: the character to unget
-        @type c: string
-        @raises UngetBufferFull: there is already an ungotten char
+        c: the character to unget
+        raises UngetBufferFull: there is already an ungotten char
        """

        if self.ungotten_char is not None:
-            raise UngetBufferFull
+            # this should never happen!
+            raise UngetBufferFull  # pragma: no cover
        self.ungotten_char = c

    def skip_whitespace(self):
@ -278,7 +309,7 @@ class Tokenizer(object):

        If the tokenizer is in multiline mode, then newlines are whitespace.

-        @rtype: int
+        Returns the number of characters skipped.
        """

        skipped = 0
@ -293,15 +324,17 @@ class Tokenizer(object):
    def get(self, want_leading=False, want_comment=False):
        """Get the next token.

-        @param want_leading: If True, return a WHITESPACE token if the
+        want_leading: If True, return a WHITESPACE token if the
        first character read is whitespace.  The default is False.
-        @type want_leading: bool
-        @param want_comment: If True, return a COMMENT token if the
+
+        want_comment: If True, return a COMMENT token if the
        first token read is a comment.  The default is False.
-        @type want_comment: bool
-        @rtype: Token object
-        @raises dns.exception.UnexpectedEnd: input ended prematurely
-        @raises dns.exception.SyntaxError: input was badly formed
+
+        Raises dns.exception.UnexpectedEnd: input ended prematurely
+
+        Raises dns.exception.SyntaxError: input was badly formed
+
+        Returns a Token.
        """

        if self.ungotten_token is not None:
@ -332,7 +365,7 @@ class Tokenizer(object):
                        self.skip_whitespace()
                        continue
                    elif c == ')':
-                        if not self.multiline > 0:
+                        if self.multiline <= 0:
                            raise dns.exception.SyntaxError
                        self.multiline -= 1
                        self.skip_whitespace()
@ -379,23 +412,8 @@ class Tokenizer(object):
                else:
                    self._unget_char(c)
                break
-            elif self.quoting:
-                if c == '\\':
-                    c = self._get_char()
-                    if c == '':
-                        raise dns.exception.UnexpectedEnd
-                    if c.isdigit():
-                        c2 = self._get_char()
-                        if c2 == '':
-                            raise dns.exception.UnexpectedEnd
-                        c3 = self._get_char()
-                        if c == '':
-                            raise dns.exception.UnexpectedEnd
-                        if not (c2.isdigit() and c3.isdigit()):
-                            raise dns.exception.SyntaxError
-                        c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
-                elif c == '\n':
-                    raise dns.exception.SyntaxError('newline in quoted string')
+            elif self.quoting and c == '\n':
+                raise dns.exception.SyntaxError('newline in quoted string')
            elif c == '\\':
                #
                # It's an escape.  Put it and the next character into
@ -420,9 +438,9 @@ class Tokenizer(object):
        an error to try to unget a token when the unget buffer is not
        empty.

-        @param token: the token to unget
-        @type token: Token object
-        @raises UngetBufferFull: there is already an ungotten token
+        token: the token to unget
+
+        Raises UngetBufferFull: there is already an ungotten token
        """

        if self.ungotten_token is not None:
@ -431,7 +449,8 @@ class Tokenizer(object):

    def next(self):
        """Return the next item in an iteration.
-        @rtype: (int, string)
+
+        Returns a Token.
        """

        token = self.get()
@ -446,11 +465,12 @@ class Tokenizer(object):

    # Helpers

-    def get_int(self):
-        """Read the next token and interpret it as an integer.
+    def get_int(self, base=10):
+        """Read the next token and interpret it as an unsigned integer.

-        @raises dns.exception.SyntaxError:
-        @rtype: int
+        Raises dns.exception.SyntaxError if not an unsigned integer.
+
+        Returns an int.
        """

        token = self.get().unescape()
@ -458,14 +478,15 @@ class Tokenizer(object):
            raise dns.exception.SyntaxError('expecting an identifier')
        if not token.value.isdigit():
            raise dns.exception.SyntaxError('expecting an integer')
-        return int(token.value)
+        return int(token.value, base)

    def get_uint8(self):
        """Read the next token and interpret it as an 8-bit unsigned
        integer.

-        @raises dns.exception.SyntaxError:
-        @rtype: int
+        Raises dns.exception.SyntaxError if not an 8-bit unsigned integer.
+
+        Returns an int.
        """

        value = self.get_int()
@ -474,56 +495,63 @@ class Tokenizer(object):
                '%d is not an unsigned 8-bit integer' % value)
        return value

-    def get_uint16(self):
+    def get_uint16(self, base=10):
        """Read the next token and interpret it as a 16-bit unsigned
        integer.

-        @raises dns.exception.SyntaxError:
-        @rtype: int
+        Raises dns.exception.SyntaxError if not a 16-bit unsigned integer.
+
+        Returns an int.
        """

-        value = self.get_int()
+        value = self.get_int(base=base)
        if value < 0 or value > 65535:
-            raise dns.exception.SyntaxError(
-                '%d is not an unsigned 16-bit integer' % value)
+            if base == 8:
+                raise dns.exception.SyntaxError(
+                    '%o is not an octal unsigned 16-bit integer' % value)
+            else:
+                raise dns.exception.SyntaxError(
+                    '%d is not an unsigned 16-bit integer' % value)
        return value

-    def get_uint32(self):
+    def get_uint32(self, base=10):
        """Read the next token and interpret it as a 32-bit unsigned
        integer.

-        @raises dns.exception.SyntaxError:
-        @rtype: int
+        Raises dns.exception.SyntaxError if not a 32-bit unsigned integer.
+
+        Returns an int.
        """

-        token = self.get().unescape()
-        if not token.is_identifier():
-            raise dns.exception.SyntaxError('expecting an identifier')
-        if not token.value.isdigit():
-            raise dns.exception.SyntaxError('expecting an integer')
-        value = long(token.value)
-        if value < 0 or value > long(4294967296):
+        value = self.get_int(base=base)
+        if value < 0 or value > 4294967295:
            raise dns.exception.SyntaxError(
                '%d is not an unsigned 32-bit integer' % value)
        return value

-    def get_string(self, origin=None):
+    def get_string(self, max_length=None):
        """Read the next token and interpret it as a string.

-        @raises dns.exception.SyntaxError:
-        @rtype: string
+        Raises dns.exception.SyntaxError if not a string.
+        Raises dns.exception.SyntaxError if token value length
+        exceeds max_length (if specified).
+
+        Returns a string.
        """

        token = self.get().unescape()
        if not (token.is_identifier() or token.is_quoted_string()):
            raise dns.exception.SyntaxError('expecting a string')
+        if max_length and len(token.value) > max_length:
+            raise dns.exception.SyntaxError("string too long")
        return token.value

-    def get_identifier(self, origin=None):
-        """Read the next token and raise an exception if it is not an identifier.
+    def get_identifier(self):
+        """Read the next token, which should be an identifier.

-        @raises dns.exception.SyntaxError:
-        @rtype: string
+        Raises dns.exception.SyntaxError if not an identifier.
+
+        Returns a string.
        """

        token = self.get().unescape()
@ -531,23 +559,53 @@ class Tokenizer(object):
            raise dns.exception.SyntaxError('expecting an identifier')
        return token.value

-    def get_name(self, origin=None):
-        """Read the next token and interpret it as a DNS name.
+    def concatenate_remaining_identifiers(self):
+        """Read the remaining tokens on the line, which should be identifiers.

-        @raises dns.exception.SyntaxError:
-        @rtype: dns.name.Name object"""
+        Raises dns.exception.SyntaxError if a token is seen that is not an
+        identifier.

-        token = self.get()
+        Returns a string containing a concatenation of the remaining
+        identifiers.
+        """
+        s = ""
+        while True:
+            token = self.get().unescape()
+            if token.is_eol_or_eof():
+                break
+            if not token.is_identifier():
+                raise dns.exception.SyntaxError
+            s += token.value
+        return s
+
+    def as_name(self, token, origin=None, relativize=False, relativize_to=None):
+        """Try to interpret the token as a DNS name.
+
+        Raises dns.exception.SyntaxError if not a name.
+
+        Returns a dns.name.Name.
+        """
        if not token.is_identifier():
            raise dns.exception.SyntaxError('expecting an identifier')
-        return dns.name.from_text(token.value, origin)
+        name = dns.name.from_text(token.value, origin, self.idna_codec)
+        return name.choose_relativity(relativize_to or origin, relativize)
+
+    def get_name(self, origin=None, relativize=False, relativize_to=None):
+        """Read the next token and interpret it as a DNS name.
+
+        Raises dns.exception.SyntaxError if not a name.
+
+        Returns a dns.name.Name.
+        """
+
+        token = self.get()
+        return self.as_name(token, origin, relativize, relativize_to)

    def get_eol(self):
        """Read the next token and raise an exception if it isn't EOL or
        EOF.

-        @raises dns.exception.SyntaxError:
-        @rtype: string
+        Returns a string.
        """

        token = self.get()
@ -558,6 +616,14 @@ class Tokenizer(object):
        return token.value

    def get_ttl(self):
+        """Read the next token and interpret it as a DNS TTL.
+
+        Raises dns.exception.SyntaxError or dns.ttl.BadTTL if not an
+        identifier or badly formed.
+
+        Returns an int.
+        """
+
        token = self.get().unescape()
        if not token.is_identifier():
            raise dns.exception.SyntaxError('expecting an identifier')