From fc16e7c374b8f7e6bdc80afd9be660ad10d60f63 Mon Sep 17 00:00:00 2001 From: Labrys of Knossos Date: Sat, 15 Dec 2018 13:37:57 -0500 Subject: [PATCH] Update rencode to 1.0.6 --- libs/rencode/__init__.py | 12 + libs/{rencode.py => rencode/rencode_orig.py} | 373 +++++++++---------- 2 files changed, 185 insertions(+), 200 deletions(-) create mode 100644 libs/rencode/__init__.py rename libs/{rencode.py => rencode/rencode_orig.py} (57%) diff --git a/libs/rencode/__init__.py b/libs/rencode/__init__.py new file mode 100644 index 00000000..94a54658 --- /dev/null +++ b/libs/rencode/__init__.py @@ -0,0 +1,12 @@ +try: + from rencode._rencode import * + from rencode._rencode import __version__ +except ImportError: + import rencode.rencode_orig + prev_all = rencode.rencode_orig.__all__[:] + del rencode.rencode_orig.__all__ + from rencode.rencode_orig import * + from rencode.rencode_orig import __version__ + rencode.rencode_orig.__all__ = prev_all + +__all__ = ['dumps', 'loads'] diff --git a/libs/rencode.py b/libs/rencode/rencode_orig.py similarity index 57% rename from libs/rencode.py rename to libs/rencode/rencode_orig.py index 85552862..83f0de33 100644 --- a/libs/rencode.py +++ b/libs/rencode/rencode_orig.py @@ -1,35 +1,3 @@ -# coding=utf-8 -""" -rencode -- Web safe object pickling/unpickling. - -Public domain, Connelly Barnes 2006-2007. - -The rencode module is a modified version of bencode from the -BitTorrent project. For complex, heterogeneous data structures with -many small elements, r-encodings take up significantly less space than -b-encodings: - - >>> len(rencode.dumps({'a': 0, 'b': [1, 2], 'c': 99})) - 13 - >>> len(bencode.bencode({'a': 0, 'b': [1, 2], 'c': 99})) - 26 - -The rencode format is not standardized, and may change with different -rencode module versions, so you should check that you are using the -same rencode version throughout your project. -""" - -import struct -from threading import Lock - -from six import PY3 - -if PY3: - long = int - -__version__ = '1.0.1' -__all__ = ['dumps', 'loads'] - # Original bencode module by Petru Paler, et al. # # Modifications by Connelly Barnes: @@ -69,6 +37,45 @@ __all__ = ['dumps', 'loads'] # # (The rencode module is licensed under the above license as well). # +# pylint: disable=redefined-builtin + +""" +rencode -- Web safe object pickling/unpickling. + +Public domain, Connelly Barnes 2006-2007. + +The rencode module is a modified version of bencode from the +BitTorrent project. For complex, heterogeneous data structures with +many small elements, r-encodings take up significantly less space than +b-encodings: + + >>> len(rencode.dumps({'a':0, 'b':[1,2], 'c':99})) + 13 + >>> len(bencode.bencode({'a':0, 'b':[1,2], 'c':99})) + 26 + +The rencode format is not standardized, and may change with different +rencode module versions, so you should check that you are using the +same rencode version throughout your project. +""" + +import struct +import sys +from threading import Lock + +__version__ = ("Python", 1, 0, 6) +__all__ = ('dumps', 'loads') + +py3 = sys.version_info[0] >= 3 +if py3: + long = int + unicode = str + + def int2byte(c): + return bytes([c]) +else: + def int2byte(c): + return chr(c) # Default number of bits for serialized floats, either 32 or 64 (also a parameter for dumps()). DEFAULT_FLOAT_BITS = 32 @@ -78,19 +85,19 @@ MAX_INT_LENGTH = 64 # The bencode 'typecodes' such as i, d, etc have been extended and # relocated on the base-256 character set. -CHR_LIST = chr(59) -CHR_DICT = chr(60) -CHR_INT = chr(61) -CHR_INT1 = chr(62) -CHR_INT2 = chr(63) -CHR_INT4 = chr(64) -CHR_INT8 = chr(65) -CHR_FLOAT32 = chr(66) -CHR_FLOAT64 = chr(44) -CHR_TRUE = chr(67) -CHR_FALSE = chr(68) -CHR_NONE = chr(69) -CHR_TERM = chr(127) +CHR_LIST = int2byte(59) +CHR_DICT = int2byte(60) +CHR_INT = int2byte(61) +CHR_INT1 = int2byte(62) +CHR_INT2 = int2byte(63) +CHR_INT4 = int2byte(64) +CHR_INT8 = int2byte(65) +CHR_FLOAT32 = int2byte(66) +CHR_FLOAT64 = int2byte(44) +CHR_TRUE = int2byte(67) +CHR_FALSE = int2byte(68) +CHR_NONE = int2byte(69) +CHR_TERM = int2byte(127) # Positive integers with value embedded in typecode. INT_POS_FIXED_START = 0 @@ -112,6 +119,9 @@ STR_FIXED_COUNT = 64 LIST_FIXED_START = STR_FIXED_START + STR_FIXED_COUNT LIST_FIXED_COUNT = 64 +# Whether strings should be decoded when loading +_decode_utf8 = False + def decode_int(x, f): f += 1 @@ -122,48 +132,49 @@ def decode_int(x, f): n = int(x[f:newf]) except (OverflowError, ValueError): n = long(x[f:newf]) - if x[f] == '-': - if x[f + 1] == '0': + if x[f:f + 1] == '-': + if x[f + 1:f + 2] == '0': raise ValueError - elif x[f] == '0' and newf != f + 1: + elif x[f:f + 1] == '0' and newf != f + 1: raise ValueError - return n, newf + 1 + return (n, newf + 1) def decode_intb(x, f): f += 1 - return struct.unpack('!b', x[f:f + 1])[0], f + 1 + return (struct.unpack('!b', x[f:f + 1])[0], f + 1) def decode_inth(x, f): f += 1 - return struct.unpack('!h', x[f:f + 2])[0], f + 2 + return (struct.unpack('!h', x[f:f + 2])[0], f + 2) def decode_intl(x, f): f += 1 - return struct.unpack('!l', x[f:f + 4])[0], f + 4 + + return (struct.unpack('!l', x[f:f + 4])[0], f + 4) def decode_intq(x, f): f += 1 - return struct.unpack('!q', x[f:f + 8])[0], f + 8 + return (struct.unpack('!q', x[f:f + 8])[0], f + 8) def decode_float32(x, f): f += 1 n = struct.unpack('!f', x[f:f + 4])[0] - return n, f + 4 + return (n, f + 4) def decode_float64(x, f): f += 1 n = struct.unpack('!d', x[f:f + 8])[0] - return n, f + 8 + return (n, f + 8) def decode_string(x, f): - colon = x.index(':', f) + colon = x.index(b':', f) try: n = int(x[f:colon]) except (OverflowError, ValueError): @@ -172,86 +183,73 @@ def decode_string(x, f): raise ValueError colon += 1 s = x[colon:colon + n] - try: - t = s.decode("utf8") - if len(t) != len(s): - s = t - except UnicodeDecodeError: - pass - return s, colon + n + if _decode_utf8: + s = s.decode('utf8') + return (s, colon + n) def decode_list(x, f): r, f = [], f + 1 - while x[f] != CHR_TERM: - v, f = decode_func[x[f]](x, f) + while x[f:f + 1] != CHR_TERM: + v, f = decode_func[x[f:f + 1]](x, f) r.append(v) - return tuple(r), f + 1 + return (tuple(r), f + 1) def decode_dict(x, f): r, f = {}, f + 1 - while x[f] != CHR_TERM: - k, f = decode_func[x[f]](x, f) - r[k], f = decode_func[x[f]](x, f) - return r, f + 1 + while x[f:f + 1] != CHR_TERM: + k, f = decode_func[x[f:f + 1]](x, f) + r[k], f = decode_func[x[f:f + 1]](x, f) + return (r, f + 1) def decode_true(x, f): - return True, f + 1 + return (True, f + 1) def decode_false(x, f): - return False, f + 1 + return (False, f + 1) def decode_none(x, f): - return None, f + 1 + return (None, f + 1) - -decode_func = { - '0': decode_string, - '1': decode_string, - '2': decode_string, - '3': decode_string, - '4': decode_string, - '5': decode_string, - '6': decode_string, - '7': decode_string, - '8': decode_string, - '9': decode_string, - CHR_LIST: decode_list, - CHR_DICT: decode_dict, - CHR_INT: decode_int, - CHR_INT1: decode_intb, - CHR_INT2: decode_inth, - CHR_INT4: decode_intl, - CHR_INT8: decode_intq, - CHR_FLOAT32: decode_float32, - CHR_FLOAT64: decode_float64, - CHR_TRUE: decode_true, - CHR_FALSE: decode_false, - CHR_NONE: decode_none, -} +decode_func = {} +decode_func[b'0'] = decode_string +decode_func[b'1'] = decode_string +decode_func[b'2'] = decode_string +decode_func[b'3'] = decode_string +decode_func[b'4'] = decode_string +decode_func[b'5'] = decode_string +decode_func[b'6'] = decode_string +decode_func[b'7'] = decode_string +decode_func[b'8'] = decode_string +decode_func[b'9'] = decode_string +decode_func[CHR_LIST] = decode_list +decode_func[CHR_DICT] = decode_dict +decode_func[CHR_INT] = decode_int +decode_func[CHR_INT1] = decode_intb +decode_func[CHR_INT2] = decode_inth +decode_func[CHR_INT4] = decode_intl +decode_func[CHR_INT8] = decode_intq +decode_func[CHR_FLOAT32] = decode_float32 +decode_func[CHR_FLOAT64] = decode_float64 +decode_func[CHR_TRUE] = decode_true +decode_func[CHR_FALSE] = decode_false +decode_func[CHR_NONE] = decode_none def make_fixed_length_string_decoders(): def make_decoder(slen): def f(x, f): s = x[f + 1:f + 1 + slen] - try: - t = s.decode("utf8") - if len(t) != len(s): - s = t - except UnicodeDecodeError: - pass - return s, f + 1 + slen - + if _decode_utf8: + s = s.decode("utf8") + return (s, f + 1 + slen) return f - for i in range(STR_FIXED_COUNT): - decode_func[chr(STR_FIXED_START + i)] = make_decoder(i) - + decode_func[int2byte(STR_FIXED_START + i)] = make_decoder(i) make_fixed_length_string_decoders() @@ -260,16 +258,13 @@ def make_fixed_length_list_decoders(): def make_decoder(slen): def f(x, f): r, f = [], f + 1 - for i in range(slen): - v, f = decode_func[x[f]](x, f) + for _ in range(slen): + v, f = decode_func[x[f:f + 1]](x, f) r.append(v) - return tuple(r), f - + return (tuple(r), f) return f - for i in range(LIST_FIXED_COUNT): - decode_func[chr(LIST_FIXED_START + i)] = make_decoder(i) - + decode_func[int2byte(LIST_FIXED_START + i)] = make_decoder(i) make_fixed_length_list_decoders() @@ -277,15 +272,12 @@ make_fixed_length_list_decoders() def make_fixed_length_int_decoders(): def make_decoder(j): def f(x, f): - return j, f + 1 - + return (j, f + 1) return f - for i in range(INT_POS_FIXED_COUNT): - decode_func[chr(INT_POS_FIXED_START + i)] = make_decoder(i) + decode_func[int2byte(INT_POS_FIXED_START + i)] = make_decoder(i) for i in range(INT_NEG_FIXED_COUNT): - decode_func[chr(INT_NEG_FIXED_START + i)] = make_decoder(-1 - i) - + decode_func[int2byte(INT_NEG_FIXED_START + i)] = make_decoder(-1 - i) make_fixed_length_int_decoders() @@ -294,31 +286,22 @@ def make_fixed_length_dict_decoders(): def make_decoder(slen): def f(x, f): r, f = {}, f + 1 - for j in range(slen): - k, f = decode_func[x[f]](x, f) - r[k], f = decode_func[x[f]](x, f) - return r, f - + for _ in range(slen): + k, f = decode_func[x[f:f + 1]](x, f) + r[k], f = decode_func[x[f:f + 1]](x, f) + return (r, f) return f - for i in range(DICT_FIXED_COUNT): - decode_func[chr(DICT_FIXED_START + i)] = make_decoder(i) - + decode_func[int2byte(DICT_FIXED_START + i)] = make_decoder(i) make_fixed_length_dict_decoders() -def encode_dict(x, r): - r.append(CHR_DICT) - for k, v in x.items(): - encode_func[type(k)](k, r) - encode_func[type(v)](v, r) - r.append(CHR_TERM) - - -def loads(x): +def loads(x, decode_utf8=False): + global _decode_utf8 + _decode_utf8 = decode_utf8 try: - r, l = decode_func[x[0]](x, 0) + r, l = decode_func[x[0:1]](x, 0) except (IndexError, KeyError): raise ValueError if l != len(x): @@ -326,14 +309,11 @@ def loads(x): return r -from types import StringType, IntType, LongType, DictType, ListType, TupleType, FloatType, NoneType, UnicodeType - - def encode_int(x, r): if 0 <= x < INT_POS_FIXED_COUNT: - r.append(chr(INT_POS_FIXED_START + x)) + r.append(int2byte(INT_POS_FIXED_START + x)) elif -INT_NEG_FIXED_COUNT <= x < 0: - r.append(chr(INT_NEG_FIXED_START - 1 - x)) + r.append(int2byte(INT_NEG_FIXED_START - 1 - x)) elif -128 <= x < 128: r.extend((CHR_INT1, struct.pack('!b', x))) elif -32768 <= x < 32768: @@ -344,6 +324,9 @@ def encode_int(x, r): r.extend((CHR_INT8, struct.pack('!q', x))) else: s = str(x) + if py3: + s = bytes(s, "ascii") + if len(s) >= MAX_INT_LENGTH: raise ValueError('overflow') r.extend((CHR_INT, s, CHR_TERM)) @@ -358,18 +341,21 @@ def encode_float64(x, r): def encode_bool(x, r): - r.extend({False: CHR_FALSE, True: CHR_TRUE}[bool(x)]) + r.append({False: CHR_FALSE, True: CHR_TRUE}[bool(x)]) def encode_none(x, r): - r.extend(CHR_NONE) + r.append(CHR_NONE) def encode_string(x, r): if len(x) < STR_FIXED_COUNT: - r.extend((chr(STR_FIXED_START + len(x)), x)) + r.extend((int2byte(STR_FIXED_START + len(x)), x)) else: - r.extend((str(len(x)), ':', x)) + s = str(len(x)) + if py3: + s = bytes(s, "ascii") + r.extend((s, b':', x)) def encode_unicode(x, r): @@ -378,7 +364,7 @@ def encode_unicode(x, r): def encode_list(x, r): if len(x) < LIST_FIXED_COUNT: - r.append(chr(LIST_FIXED_START + len(x))) + r.append(int2byte(LIST_FIXED_START + len(x))) for i in x: encode_func[type(i)](i, r) else: @@ -390,7 +376,7 @@ def encode_list(x, r): def encode_dict(x, r): if len(x) < DICT_FIXED_COUNT: - r.append(chr(DICT_FIXED_START + len(x))) + r.append(int2byte(DICT_FIXED_START + len(x))) for k, v in x.items(): encode_func[type(k)](k, r) encode_func[type(v)](v, r) @@ -401,27 +387,19 @@ def encode_dict(x, r): encode_func[type(v)](v, r) r.append(CHR_TERM) - -encode_func = { - IntType: encode_int, - LongType: encode_int, - StringType: encode_string, - ListType: encode_list, - TupleType: encode_list, - DictType: encode_dict, - NoneType: encode_none, - UnicodeType: encode_unicode, -} +encode_func = {} +encode_func[int] = encode_int +encode_func[long] = encode_int +encode_func[bytes] = encode_string +encode_func[list] = encode_list +encode_func[tuple] = encode_list +encode_func[dict] = encode_dict +encode_func[type(None)] = encode_none +encode_func[unicode] = encode_unicode +encode_func[bool] = encode_bool lock = Lock() -try: - from types import BooleanType - - encode_func[BooleanType] = encode_bool -except ImportError: - pass - def dumps(x, float_bits=DEFAULT_FLOAT_BITS): """ @@ -429,60 +407,55 @@ def dumps(x, float_bits=DEFAULT_FLOAT_BITS): Here float_bits is either 32 or 64. """ - lock.acquire() - try: + with lock: if float_bits == 32: - encode_func[FloatType] = encode_float32 + encode_func[float] = encode_float32 elif float_bits == 64: - encode_func[FloatType] = encode_float64 + encode_func[float] = encode_float64 else: - raise ValueError('Float bits ({0:d}) is not 32 or 64'.format(float_bits)) + raise ValueError('Float bits (%d) is not 32 or 64' % float_bits) r = [] encode_func[type(x)](x, r) - finally: - lock.release() - return ''.join(r) + return b''.join(r) def test(): f1 = struct.unpack('!f', struct.pack('!f', 25.5))[0] f2 = struct.unpack('!f', struct.pack('!f', 29.3))[0] f3 = struct.unpack('!f', struct.pack('!f', -0.6))[0] - L = (({'a': 15, 'bb': f1, 'ccc': f2, '': (f3, (), False, True, '')}, ('a', 10 ** 20), tuple(range(-100000, 100000)), - 'b' * 31, 'b' * 62, 'b' * 64, 2 ** 30, 2 ** 33, 2 ** 62, 2 ** 64, 2 ** 30, 2 ** 33, 2 ** 62, 2 ** 64, False, - False, True, -1, 2, 0),) - assert loads(dumps(L)) == L + ld = (({b'a': 15, b'bb': f1, b'ccc': f2, b'': (f3, (), False, True, b'')}, (b'a', 10**20), + tuple(range(-100000, 100000)), b'b' * 31, b'b' * 62, b'b' * 64, 2**30, 2**33, 2**62, + 2**64, 2**30, 2**33, 2**62, 2**64, False, False, True, -1, 2, 0),) + assert loads(dumps(ld)) == ld d = dict(zip(range(-100000, 100000), range(-100000, 100000))) - d.update({'a': 20, 20: 40, 40: 41, f1: f2, f2: f3, f3: False, False: True, True: False}) - L = (d, {}, {5: 6}, {7: 7, True: 8}, {9: 10, 22: 39, 49: 50, 44: ''}) - assert loads(dumps(L)) == L - L = ('', 'a' * 10, 'a' * 100, 'a' * 1000, 'a' * 10000, 'a' * 100000, 'a' * 1000000, 'a' * 10000000) - assert loads(dumps(L)) == L - L = tuple([dict(zip(range(n), range(n))) for n in range(100)]) + ('b',) - assert loads(dumps(L)) == L - L = tuple([dict(zip(range(n), range(-n, 0))) for n in range(100)]) + ('b',) - assert loads(dumps(L)) == L - L = tuple([tuple(range(n)) for n in range(100)]) + ('b',) - assert loads(dumps(L)) == L - L = tuple(['a' * n for n in range(1000)]) + ('b',) - assert loads(dumps(L)) == L - L = tuple(['a' * n for n in range(1000)]) + (None, True, None) - assert loads(dumps(L)) == L + d.update({b'a': 20, 20: 40, 40: 41, f1: f2, f2: f3, f3: False, False: True, True: False}) + ld = (d, {}, {5: 6}, {7: 7, True: 8}, {9: 10, 22: 39, 49: 50, 44: b''}) + assert loads(dumps(ld)) == ld + ld = (b'', b'a' * 10, b'a' * 100, b'a' * 1000, b'a' * 10000, b'a' * 100000, b'a' * 1000000, b'a' * 10000000) + assert loads(dumps(ld)) == ld + ld = tuple([dict(zip(range(n), range(n))) for n in range(100)]) + (b'b',) + assert loads(dumps(ld)) == ld + ld = tuple([dict(zip(range(n), range(-n, 0))) for n in range(100)]) + (b'b',) + assert loads(dumps(ld)) == ld + ld = tuple([tuple(range(n)) for n in range(100)]) + (b'b',) + assert loads(dumps(ld)) == ld + ld = tuple([b'a' * n for n in range(1000)]) + (b'b',) + assert loads(dumps(ld)) == ld + ld = tuple([b'a' * n for n in range(1000)]) + (None, True, None) + assert loads(dumps(ld)) == ld assert loads(dumps(None)) is None assert loads(dumps({None: None})) == {None: None} assert 1e-10 < abs(loads(dumps(1.1)) - 1.1) < 1e-6 assert 1e-10 < abs(loads(dumps(1.1, 32)) - 1.1) < 1e-6 assert abs(loads(dumps(1.1, 64)) - 1.1) < 1e-12 - assert loads(dumps(u"Hello World!!")) - - + assert loads(dumps("Hello World!!"), decode_utf8=True) try: import psyco - psyco.bind(dumps) psyco.bind(loads) except ImportError: pass + if __name__ == '__main__': test()