Update python-twitter to 3.4.1

2025-07-11 15:56:07 -07:00 · 2018-05-23 17:12:19 -07:00 · 2018-05-23 17:12:19 -07:00 · f743a817ba
commit f743a817ba
parent 8e4aba7ed4
8 changed files with 1520 additions and 993 deletions
--- a/lib/twitter/twitter_utils.py
+++ b/lib/twitter/twitter_utils.py
@ -1,13 +1,33 @@
 # encoding: utf-8
+from __future__ import unicode_literals
+
 import mimetypes
 import os
 import re
+import sys
+from tempfile import NamedTemporaryFile
+from unicodedata import normalize
+
+try:
+    from urllib.parse import urlparse
+except ImportError:
+    from urlparse import urlparse

 import requests
-from tempfile import NamedTemporaryFile
-
 from twitter import TwitterError
+import twitter

+if sys.version_info < (3,):
+    range = xrange
+
+if sys.version_info > (3,):
+    unicode = str
+
+CHAR_RANGES = [
+    range(0, 4351),
+    range(8192, 8205),
+    range(8208, 8223),
+    range(8242, 8247)]

 TLDS = [
    "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar",
@ -138,7 +158,14 @@ TLDS = [
    "淡马锡", "游戏", "点看", "移动", "组织机构", "网址", "网店", "网络", "谷歌", "集团",
    "飞利浦", "餐厅", "닷넷", "닷컴", "삼성", "onion"]

-URL_REGEXP = re.compile(r'(?i)((?:https?://|www\\.)*(?:[\w+-_]+[.])(?:' + r'\b|'.join(TLDS) + r'\b|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))+(?:[:\w+\/]?[a-z0-9!\*\'\(\);:&=\+\$/%#\[\]\-_\.,~?])*)', re.UNICODE)
+URL_REGEXP = re.compile((
+    r'('
+    r'^(?!(https?://|www\.)?\.|ftps?://|([0-9]+\.){{1,3}}\d+)'  # exclude urls that start with "."
+    r'(?:https?://|www\.)*^(?!.*@)(?:[\w+-_]+[.])'              # beginning of url
+    r'(?:{0}\b'                                                # all tlds
+    r'(?:[:0-9]))'                                              # port numbers & close off TLDs
+    r'(?:[\w+\/]?[a-z0-9!\*\'\(\);:&=\+\$/%#\[\]\-_\.,~?])*'    # path/query params
+    r')').format(r'\b|'.join(TLDS)), re.U | re.I | re.X)


 def calc_expected_status_length(status, short_url_length=23):
@ -153,12 +180,19 @@ def calc_expected_status_length(status, short_url_length=23):
        Expected length of the status message as an integer.

    """
-    replaced_chars = 0
-    status_length = len(status)
-    match = re.findall(URL_REGEXP, status)
-    if len(match) >= 1:
-        replaced_chars = len(''.join(match))
-    status_length = status_length - replaced_chars + (short_url_length * len(match))
+    status_length = 0
+    if isinstance(status, bytes):
+        status = unicode(status)
+    for word in re.split(r'\s', status):
+        if is_url(word):
+            status_length += short_url_length
+        else:
+            for character in word:
+                if any([ord(normalize("NFC", character)) in char_range for char_range in CHAR_RANGES]):
+                    status_length += 1
+                else:
+                    status_length += 2
+    status_length += len(re.findall(r'\s', status))
    return status_length


@ -171,16 +205,14 @@ def is_url(text):
    Returns:
        Boolean of whether the text should be treated as a URL or not.
    """
-    if re.findall(URL_REGEXP, text):
-        return True
-    else:
-        return False
+    return bool(re.findall(URL_REGEXP, text))


 def http_to_file(http):
    data_file = NamedTemporaryFile()
    req = requests.get(http, stream=True)
-    data_file.write(req.raw.data)
+    for chunk in req.iter_content(chunk_size=1024 * 1024):
+        data_file.write(chunk)
    return data_file


@ -200,7 +232,8 @@ def parse_media_file(passed_media):
                   'image/gif',
                   'image/bmp',
                   'image/webp']
-    video_formats = ['video/mp4']
+    video_formats = ['video/mp4',
+                     'video/quicktime']

    # If passed_media is a string, check if it points to a URL, otherwise,
    # it should point to local file. Create a reference to a file obj for
@ -208,7 +241,7 @@ def parse_media_file(passed_media):
    if not hasattr(passed_media, 'read'):
        if passed_media.startswith('http'):
            data_file = http_to_file(passed_media)
-            filename = os.path.basename(passed_media)
+            filename = os.path.basename(urlparse(passed_media).path)
        else:
            data_file = open(os.path.realpath(passed_media), 'rb')
            filename = os.path.basename(passed_media)
@ -216,8 +249,8 @@ def parse_media_file(passed_media):
    # Otherwise, if a file object was passed in the first place,
    # create the standard reference to media_file (i.e., rename it to fp).
    else:
-        if passed_media.mode != 'rb':
-            raise TwitterError({'message': 'File mode must be "rb".'})
+        if passed_media.mode not in ['rb', 'rb+', 'w+b']:
+            raise TwitterError('File mode must be "rb" or "rb+"')
        filename = os.path.basename(passed_media.name)
        data_file = passed_media

@ -226,16 +259,17 @@ def parse_media_file(passed_media):

    try:
        data_file.seek(0)
-    except:
+    except Exception as e:
        pass

    media_type = mimetypes.guess_type(os.path.basename(filename))[0]
-    if media_type in img_formats and file_size > 5 * 1048576:
-        raise TwitterError({'message': 'Images must be less than 5MB.'})
-    elif media_type in video_formats and file_size > 15 * 1048576:
-        raise TwitterError({'message': 'Videos must be less than 15MB.'})
-    elif media_type not in img_formats and media_type not in video_formats:
-        raise TwitterError({'message': 'Media type could not be determined.'})
+    if media_type is not None:
+        if media_type in img_formats and file_size > 5 * 1048576:
+            raise TwitterError({'message': 'Images must be less than 5MB.'})
+        elif media_type in video_formats and file_size > 15 * 1048576:
+            raise TwitterError({'message': 'Videos must be less than 15MB.'})
+        elif media_type not in img_formats and media_type not in video_formats:
+            raise TwitterError({'message': 'Media type could not be determined.'})

    return data_file, filename, file_size, media_type

@ -263,3 +297,18 @@ def enf_type(field, _type, val):
        raise TwitterError({
            'message': '"{0}" must be type {1}'.format(field, _type.__name__)
        })
+
+
+def parse_arg_list(args, attr):
+    out = []
+    if isinstance(args, (str, unicode)):
+        out.append(args)
+    elif isinstance(args, twitter.User):
+        out.append(getattr(args, attr))
+    elif isinstance(args, (list, tuple)):
+        for item in args:
+            if isinstance(item, (str, unicode)):
+                out.append(item)
+            elif isinstance(item, twitter.User):
+                out.append(getattr(item, attr))
+    return ",".join([str(item) for item in out])