Update python-twitter to 3.4.1

This commit is contained in:
JonnyWong16 2018-05-23 17:12:19 -07:00
parent 8e4aba7ed4
commit f743a817ba
8 changed files with 1520 additions and 993 deletions

View file

@ -1,13 +1,33 @@
# encoding: utf-8
from __future__ import unicode_literals
import mimetypes
import os
import re
import sys
from tempfile import NamedTemporaryFile
from unicodedata import normalize
try:
from urllib.parse import urlparse
except ImportError:
from urlparse import urlparse
import requests
from tempfile import NamedTemporaryFile
from twitter import TwitterError
import twitter
if sys.version_info < (3,):
range = xrange
if sys.version_info > (3,):
unicode = str
CHAR_RANGES = [
range(0, 4351),
range(8192, 8205),
range(8208, 8223),
range(8242, 8247)]
TLDS = [
"ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar",
@ -138,7 +158,14 @@ TLDS = [
"淡马锡", "游戏", "点看", "移动", "组织机构", "网址", "网店", "网络", "谷歌", "集团",
"飞利浦", "餐厅", "닷넷", "닷컴", "삼성", "onion"]
URL_REGEXP = re.compile(r'(?i)((?:https?://|www\\.)*(?:[\w+-_]+[.])(?:' + r'\b|'.join(TLDS) + r'\b|(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]))+(?:[:\w+\/]?[a-z0-9!\*\'\(\);:&=\+\$/%#\[\]\-_\.,~?])*)', re.UNICODE)
URL_REGEXP = re.compile((
r'('
r'^(?!(https?://|www\.)?\.|ftps?://|([0-9]+\.){{1,3}}\d+)' # exclude urls that start with "."
r'(?:https?://|www\.)*^(?!.*@)(?:[\w+-_]+[.])' # beginning of url
r'(?:{0}\b' # all tlds
r'(?:[:0-9]))' # port numbers & close off TLDs
r'(?:[\w+\/]?[a-z0-9!\*\'\(\);:&=\+\$/%#\[\]\-_\.,~?])*' # path/query params
r')').format(r'\b|'.join(TLDS)), re.U | re.I | re.X)
def calc_expected_status_length(status, short_url_length=23):
@ -153,12 +180,19 @@ def calc_expected_status_length(status, short_url_length=23):
Expected length of the status message as an integer.
"""
replaced_chars = 0
status_length = len(status)
match = re.findall(URL_REGEXP, status)
if len(match) >= 1:
replaced_chars = len(''.join(match))
status_length = status_length - replaced_chars + (short_url_length * len(match))
status_length = 0
if isinstance(status, bytes):
status = unicode(status)
for word in re.split(r'\s', status):
if is_url(word):
status_length += short_url_length
else:
for character in word:
if any([ord(normalize("NFC", character)) in char_range for char_range in CHAR_RANGES]):
status_length += 1
else:
status_length += 2
status_length += len(re.findall(r'\s', status))
return status_length
@ -171,16 +205,14 @@ def is_url(text):
Returns:
Boolean of whether the text should be treated as a URL or not.
"""
if re.findall(URL_REGEXP, text):
return True
else:
return False
return bool(re.findall(URL_REGEXP, text))
def http_to_file(http):
data_file = NamedTemporaryFile()
req = requests.get(http, stream=True)
data_file.write(req.raw.data)
for chunk in req.iter_content(chunk_size=1024 * 1024):
data_file.write(chunk)
return data_file
@ -200,7 +232,8 @@ def parse_media_file(passed_media):
'image/gif',
'image/bmp',
'image/webp']
video_formats = ['video/mp4']
video_formats = ['video/mp4',
'video/quicktime']
# If passed_media is a string, check if it points to a URL, otherwise,
# it should point to local file. Create a reference to a file obj for
@ -208,7 +241,7 @@ def parse_media_file(passed_media):
if not hasattr(passed_media, 'read'):
if passed_media.startswith('http'):
data_file = http_to_file(passed_media)
filename = os.path.basename(passed_media)
filename = os.path.basename(urlparse(passed_media).path)
else:
data_file = open(os.path.realpath(passed_media), 'rb')
filename = os.path.basename(passed_media)
@ -216,8 +249,8 @@ def parse_media_file(passed_media):
# Otherwise, if a file object was passed in the first place,
# create the standard reference to media_file (i.e., rename it to fp).
else:
if passed_media.mode != 'rb':
raise TwitterError({'message': 'File mode must be "rb".'})
if passed_media.mode not in ['rb', 'rb+', 'w+b']:
raise TwitterError('File mode must be "rb" or "rb+"')
filename = os.path.basename(passed_media.name)
data_file = passed_media
@ -226,16 +259,17 @@ def parse_media_file(passed_media):
try:
data_file.seek(0)
except:
except Exception as e:
pass
media_type = mimetypes.guess_type(os.path.basename(filename))[0]
if media_type in img_formats and file_size > 5 * 1048576:
raise TwitterError({'message': 'Images must be less than 5MB.'})
elif media_type in video_formats and file_size > 15 * 1048576:
raise TwitterError({'message': 'Videos must be less than 15MB.'})
elif media_type not in img_formats and media_type not in video_formats:
raise TwitterError({'message': 'Media type could not be determined.'})
if media_type is not None:
if media_type in img_formats and file_size > 5 * 1048576:
raise TwitterError({'message': 'Images must be less than 5MB.'})
elif media_type in video_formats and file_size > 15 * 1048576:
raise TwitterError({'message': 'Videos must be less than 15MB.'})
elif media_type not in img_formats and media_type not in video_formats:
raise TwitterError({'message': 'Media type could not be determined.'})
return data_file, filename, file_size, media_type
@ -263,3 +297,18 @@ def enf_type(field, _type, val):
raise TwitterError({
'message': '"{0}" must be type {1}'.format(field, _type.__name__)
})
def parse_arg_list(args, attr):
out = []
if isinstance(args, (str, unicode)):
out.append(args)
elif isinstance(args, twitter.User):
out.append(getattr(args, attr))
elif isinstance(args, (list, tuple)):
for item in args:
if isinstance(item, (str, unicode)):
out.append(item)
elif isinstance(item, twitter.User):
out.append(getattr(item, attr))
return ",".join([str(item) for item in out])