plexpy/lib/biplist/__init__.py
2015-02-22 18:32:51 +02:00

803 lines
29 KiB
Python
Executable file

"""biplist -- a library for reading and writing binary property list files.
Binary Property List (plist) files provide a faster and smaller serialization
format for property lists on OS X. This is a library for generating binary
plists which can be read by OS X, iOS, or other clients.
The API models the plistlib API, and will call through to plistlib when
XML serialization or deserialization is required.
To generate plists with UID values, wrap the values with the Uid object. The
value must be an int.
To generate plists with NSData/CFData values, wrap the values with the
Data object. The value must be a string.
Date values can only be datetime.datetime objects.
The exceptions InvalidPlistException and NotBinaryPlistException may be
thrown to indicate that the data cannot be serialized or deserialized as
a binary plist.
Plist generation example:
from biplist import *
from datetime import datetime
plist = {'aKey':'aValue',
'0':1.322,
'now':datetime.now(),
'list':[1,2,3],
'tuple':('a','b','c')
}
try:
writePlist(plist, "example.plist")
except (InvalidPlistException, NotBinaryPlistException), e:
print "Something bad happened:", e
Plist parsing example:
from biplist import *
try:
plist = readPlist("example.plist")
print plist
except (InvalidPlistException, NotBinaryPlistException), e:
print "Not a plist:", e
"""
import sys
from collections import namedtuple
import datetime
import io
import math
import plistlib
from struct import pack, unpack
from struct import error as struct_error
import sys
import time
try:
unicode
unicodeEmpty = r''
except NameError:
unicode = str
unicodeEmpty = ''
try:
long
except NameError:
long = int
try:
{}.iteritems
iteritems = lambda x: x.iteritems()
except AttributeError:
iteritems = lambda x: x.items()
__all__ = [
'Uid', 'Data', 'readPlist', 'writePlist', 'readPlistFromString',
'writePlistToString', 'InvalidPlistException', 'NotBinaryPlistException'
]
# Apple uses Jan 1, 2001 as a base for all plist date/times.
apple_reference_date = datetime.datetime.utcfromtimestamp(978307200)
class Uid(int):
"""Wrapper around integers for representing UID values. This
is used in keyed archiving."""
def __repr__(self):
return "Uid(%d)" % self
class Data(bytes):
"""Wrapper around str types for representing Data values."""
pass
class InvalidPlistException(Exception):
"""Raised when the plist is incorrectly formatted."""
pass
class NotBinaryPlistException(Exception):
"""Raised when a binary plist was expected but not encountered."""
pass
def readPlist(pathOrFile):
"""Raises NotBinaryPlistException, InvalidPlistException"""
didOpen = False
result = None
if isinstance(pathOrFile, (bytes, unicode)):
pathOrFile = open(pathOrFile, 'rb')
didOpen = True
try:
reader = PlistReader(pathOrFile)
result = reader.parse()
except NotBinaryPlistException as e:
try:
pathOrFile.seek(0)
result = None
if hasattr(plistlib, 'loads'):
contents = None
if isinstance(pathOrFile, (bytes, unicode)):
with open(pathOrFile, 'rb') as f:
contents = f.read()
else:
contents = pathOrFile.read()
result = plistlib.loads(contents)
else:
result = plistlib.readPlist(pathOrFile)
result = wrapDataObject(result, for_binary=True)
except Exception as e:
raise InvalidPlistException(e)
finally:
if didOpen:
pathOrFile.close()
return result
def wrapDataObject(o, for_binary=False):
if isinstance(o, Data) and not for_binary:
v = sys.version_info
if not (v[0] >= 3 and v[1] >= 4):
o = plistlib.Data(o)
elif isinstance(o, (bytes, plistlib.Data)) and for_binary:
if hasattr(o, 'data'):
o = Data(o.data)
elif isinstance(o, tuple):
o = wrapDataObject(list(o), for_binary)
o = tuple(o)
elif isinstance(o, list):
for i in range(len(o)):
o[i] = wrapDataObject(o[i], for_binary)
elif isinstance(o, dict):
for k in o:
o[k] = wrapDataObject(o[k], for_binary)
return o
def writePlist(rootObject, pathOrFile, binary=True):
if not binary:
rootObject = wrapDataObject(rootObject, binary)
if hasattr(plistlib, "dump"):
if isinstance(pathOrFile, (bytes, unicode)):
with open(pathOrFile, 'wb') as f:
return plistlib.dump(rootObject, f)
else:
return plistlib.dump(rootObject, pathOrFile)
else:
return plistlib.writePlist(rootObject, pathOrFile)
else:
didOpen = False
if isinstance(pathOrFile, (bytes, unicode)):
pathOrFile = open(pathOrFile, 'wb')
didOpen = True
writer = PlistWriter(pathOrFile)
result = writer.writeRoot(rootObject)
if didOpen:
pathOrFile.close()
return result
def readPlistFromString(data):
return readPlist(io.BytesIO(data))
def writePlistToString(rootObject, binary=True):
if not binary:
rootObject = wrapDataObject(rootObject, binary)
if hasattr(plistlib, "dumps"):
return plistlib.dumps(rootObject)
elif hasattr(plistlib, "writePlistToBytes"):
return plistlib.writePlistToBytes(rootObject)
else:
return plistlib.writePlistToString(rootObject)
else:
ioObject = io.BytesIO()
writer = PlistWriter(ioObject)
writer.writeRoot(rootObject)
return ioObject.getvalue()
def is_stream_binary_plist(stream):
stream.seek(0)
header = stream.read(7)
if header == b'bplist0':
return True
else:
return False
PlistTrailer = namedtuple('PlistTrailer', 'offsetSize, objectRefSize, offsetCount, topLevelObjectNumber, offsetTableOffset')
PlistByteCounts = namedtuple('PlistByteCounts', 'nullBytes, boolBytes, intBytes, realBytes, dateBytes, dataBytes, stringBytes, uidBytes, arrayBytes, setBytes, dictBytes')
class PlistReader(object):
file = None
contents = ''
offsets = None
trailer = None
currentOffset = 0
def __init__(self, fileOrStream):
"""Raises NotBinaryPlistException."""
self.reset()
self.file = fileOrStream
def parse(self):
return self.readRoot()
def reset(self):
self.trailer = None
self.contents = ''
self.offsets = []
self.currentOffset = 0
def readRoot(self):
result = None
self.reset()
# Get the header, make sure it's a valid file.
if not is_stream_binary_plist(self.file):
raise NotBinaryPlistException()
self.file.seek(0)
self.contents = self.file.read()
if len(self.contents) < 32:
raise InvalidPlistException("File is too short.")
trailerContents = self.contents[-32:]
try:
self.trailer = PlistTrailer._make(unpack("!xxxxxxBBQQQ", trailerContents))
offset_size = self.trailer.offsetSize * self.trailer.offsetCount
offset = self.trailer.offsetTableOffset
offset_contents = self.contents[offset:offset+offset_size]
offset_i = 0
while offset_i < self.trailer.offsetCount:
begin = self.trailer.offsetSize*offset_i
tmp_contents = offset_contents[begin:begin+self.trailer.offsetSize]
tmp_sized = self.getSizedInteger(tmp_contents, self.trailer.offsetSize)
self.offsets.append(tmp_sized)
offset_i += 1
self.setCurrentOffsetToObjectNumber(self.trailer.topLevelObjectNumber)
result = self.readObject()
except TypeError as e:
raise InvalidPlistException(e)
return result
def setCurrentOffsetToObjectNumber(self, objectNumber):
self.currentOffset = self.offsets[objectNumber]
def readObject(self):
result = None
tmp_byte = self.contents[self.currentOffset:self.currentOffset+1]
marker_byte = unpack("!B", tmp_byte)[0]
format = (marker_byte >> 4) & 0x0f
extra = marker_byte & 0x0f
self.currentOffset += 1
def proc_extra(extra):
if extra == 0b1111:
#self.currentOffset += 1
extra = self.readObject()
return extra
# bool, null, or fill byte
if format == 0b0000:
if extra == 0b0000:
result = None
elif extra == 0b1000:
result = False
elif extra == 0b1001:
result = True
elif extra == 0b1111:
pass # fill byte
else:
raise InvalidPlistException("Invalid object found at offset: %d" % (self.currentOffset - 1))
# int
elif format == 0b0001:
extra = proc_extra(extra)
result = self.readInteger(pow(2, extra))
# real
elif format == 0b0010:
extra = proc_extra(extra)
result = self.readReal(extra)
# date
elif format == 0b0011 and extra == 0b0011:
result = self.readDate()
# data
elif format == 0b0100:
extra = proc_extra(extra)
result = self.readData(extra)
# ascii string
elif format == 0b0101:
extra = proc_extra(extra)
result = self.readAsciiString(extra)
# Unicode string
elif format == 0b0110:
extra = proc_extra(extra)
result = self.readUnicode(extra)
# uid
elif format == 0b1000:
result = self.readUid(extra)
# array
elif format == 0b1010:
extra = proc_extra(extra)
result = self.readArray(extra)
# set
elif format == 0b1100:
extra = proc_extra(extra)
result = set(self.readArray(extra))
# dict
elif format == 0b1101:
extra = proc_extra(extra)
result = self.readDict(extra)
else:
raise InvalidPlistException("Invalid object found: {format: %s, extra: %s}" % (bin(format), bin(extra)))
return result
def readInteger(self, byteSize):
result = 0
original_offset = self.currentOffset
data = self.contents[self.currentOffset:self.currentOffset + byteSize]
result = self.getSizedInteger(data, byteSize, as_number=True)
self.currentOffset = original_offset + byteSize
return result
def readReal(self, length):
result = 0.0
to_read = pow(2, length)
data = self.contents[self.currentOffset:self.currentOffset+to_read]
if length == 2: # 4 bytes
result = unpack('>f', data)[0]
elif length == 3: # 8 bytes
result = unpack('>d', data)[0]
else:
raise InvalidPlistException("Unknown real of length %d bytes" % to_read)
return result
def readRefs(self, count):
refs = []
i = 0
while i < count:
fragment = self.contents[self.currentOffset:self.currentOffset+self.trailer.objectRefSize]
ref = self.getSizedInteger(fragment, len(fragment))
refs.append(ref)
self.currentOffset += self.trailer.objectRefSize
i += 1
return refs
def readArray(self, count):
result = []
values = self.readRefs(count)
i = 0
while i < len(values):
self.setCurrentOffsetToObjectNumber(values[i])
value = self.readObject()
result.append(value)
i += 1
return result
def readDict(self, count):
result = {}
keys = self.readRefs(count)
values = self.readRefs(count)
i = 0
while i < len(keys):
self.setCurrentOffsetToObjectNumber(keys[i])
key = self.readObject()
self.setCurrentOffsetToObjectNumber(values[i])
value = self.readObject()
result[key] = value
i += 1
return result
def readAsciiString(self, length):
result = unpack("!%ds" % length, self.contents[self.currentOffset:self.currentOffset+length])[0]
self.currentOffset += length
return result
def readUnicode(self, length):
actual_length = length*2
data = self.contents[self.currentOffset:self.currentOffset+actual_length]
# unpack not needed?!! data = unpack(">%ds" % (actual_length), data)[0]
self.currentOffset += actual_length
return data.decode('utf_16_be')
def readDate(self):
result = unpack(">d", self.contents[self.currentOffset:self.currentOffset+8])[0]
# Use timedelta to workaround time_t size limitation on 32-bit python.
result = datetime.timedelta(seconds=result) + apple_reference_date
self.currentOffset += 8
return result
def readData(self, length):
result = self.contents[self.currentOffset:self.currentOffset+length]
self.currentOffset += length
return Data(result)
def readUid(self, length):
return Uid(self.readInteger(length+1))
def getSizedInteger(self, data, byteSize, as_number=False):
"""Numbers of 8 bytes are signed integers when they refer to numbers, but unsigned otherwise."""
result = 0
# 1, 2, and 4 byte integers are unsigned
if byteSize == 1:
result = unpack('>B', data)[0]
elif byteSize == 2:
result = unpack('>H', data)[0]
elif byteSize == 4:
result = unpack('>L', data)[0]
elif byteSize == 8:
if as_number:
result = unpack('>q', data)[0]
else:
result = unpack('>Q', data)[0]
elif byteSize <= 16:
# Handle odd-sized or integers larger than 8 bytes
# Don't naively go over 16 bytes, in order to prevent infinite loops.
result = 0
if hasattr(int, 'from_bytes'):
result = int.from_bytes(data, 'big')
else:
for byte in data:
result = (result << 8) | unpack('>B', byte)[0]
else:
raise InvalidPlistException("Encountered integer longer than 16 bytes.")
return result
class HashableWrapper(object):
def __init__(self, value):
self.value = value
def __repr__(self):
return "<HashableWrapper: %s>" % [self.value]
class BoolWrapper(object):
def __init__(self, value):
self.value = value
def __repr__(self):
return "<BoolWrapper: %s>" % self.value
class FloatWrapper(object):
_instances = {}
def __new__(klass, value):
# Ensure FloatWrapper(x) for a given float x is always the same object
wrapper = klass._instances.get(value)
if wrapper is None:
wrapper = object.__new__(klass)
wrapper.value = value
klass._instances[value] = wrapper
return wrapper
def __repr__(self):
return "<FloatWrapper: %s>" % self.value
class PlistWriter(object):
header = b'bplist00bybiplist1.0'
file = None
byteCounts = None
trailer = None
computedUniques = None
writtenReferences = None
referencePositions = None
wrappedTrue = None
wrappedFalse = None
def __init__(self, file):
self.reset()
self.file = file
self.wrappedTrue = BoolWrapper(True)
self.wrappedFalse = BoolWrapper(False)
def reset(self):
self.byteCounts = PlistByteCounts(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
self.trailer = PlistTrailer(0, 0, 0, 0, 0)
# A set of all the uniques which have been computed.
self.computedUniques = set()
# A list of all the uniques which have been written.
self.writtenReferences = {}
# A dict of the positions of the written uniques.
self.referencePositions = {}
def positionOfObjectReference(self, obj):
"""If the given object has been written already, return its
position in the offset table. Otherwise, return None."""
return self.writtenReferences.get(obj)
def writeRoot(self, root):
"""
Strategy is:
- write header
- wrap root object so everything is hashable
- compute size of objects which will be written
- need to do this in order to know how large the object refs
will be in the list/dict/set reference lists
- write objects
- keep objects in writtenReferences
- keep positions of object references in referencePositions
- write object references with the length computed previously
- computer object reference length
- write object reference positions
- write trailer
"""
output = self.header
wrapped_root = self.wrapRoot(root)
should_reference_root = True#not isinstance(wrapped_root, HashableWrapper)
self.computeOffsets(wrapped_root, asReference=should_reference_root, isRoot=True)
self.trailer = self.trailer._replace(**{'objectRefSize':self.intSize(len(self.computedUniques))})
(_, output) = self.writeObjectReference(wrapped_root, output)
output = self.writeObject(wrapped_root, output, setReferencePosition=True)
# output size at this point is an upper bound on how big the
# object reference offsets need to be.
self.trailer = self.trailer._replace(**{
'offsetSize':self.intSize(len(output)),
'offsetCount':len(self.computedUniques),
'offsetTableOffset':len(output),
'topLevelObjectNumber':0
})
output = self.writeOffsetTable(output)
output += pack('!xxxxxxBBQQQ', *self.trailer)
self.file.write(output)
def wrapRoot(self, root):
if isinstance(root, bool):
if root is True:
return self.wrappedTrue
else:
return self.wrappedFalse
elif isinstance(root, float):
return FloatWrapper(root)
elif isinstance(root, set):
n = set()
for value in root:
n.add(self.wrapRoot(value))
return HashableWrapper(n)
elif isinstance(root, dict):
n = {}
for key, value in iteritems(root):
n[self.wrapRoot(key)] = self.wrapRoot(value)
return HashableWrapper(n)
elif isinstance(root, list):
n = []
for value in root:
n.append(self.wrapRoot(value))
return HashableWrapper(n)
elif isinstance(root, tuple):
n = tuple([self.wrapRoot(value) for value in root])
return HashableWrapper(n)
else:
return root
def incrementByteCount(self, field, incr=1):
self.byteCounts = self.byteCounts._replace(**{field:self.byteCounts.__getattribute__(field) + incr})
def computeOffsets(self, obj, asReference=False, isRoot=False):
def check_key(key):
if key is None:
raise InvalidPlistException('Dictionary keys cannot be null in plists.')
elif isinstance(key, Data):
raise InvalidPlistException('Data cannot be dictionary keys in plists.')
elif not isinstance(key, (bytes, unicode)):
raise InvalidPlistException('Keys must be strings.')
def proc_size(size):
if size > 0b1110:
size += self.intSize(size)
return size
# If this should be a reference, then we keep a record of it in the
# uniques table.
if asReference:
if obj in self.computedUniques:
return
else:
self.computedUniques.add(obj)
if obj is None:
self.incrementByteCount('nullBytes')
elif isinstance(obj, BoolWrapper):
self.incrementByteCount('boolBytes')
elif isinstance(obj, Uid):
size = self.intSize(obj)
self.incrementByteCount('uidBytes', incr=1+size)
elif isinstance(obj, (int, long)):
size = self.intSize(obj)
self.incrementByteCount('intBytes', incr=1+size)
elif isinstance(obj, FloatWrapper):
size = self.realSize(obj)
self.incrementByteCount('realBytes', incr=1+size)
elif isinstance(obj, datetime.datetime):
self.incrementByteCount('dateBytes', incr=2)
elif isinstance(obj, Data):
size = proc_size(len(obj))
self.incrementByteCount('dataBytes', incr=1+size)
elif isinstance(obj, (unicode, bytes)):
size = proc_size(len(obj))
self.incrementByteCount('stringBytes', incr=1+size)
elif isinstance(obj, HashableWrapper):
obj = obj.value
if isinstance(obj, set):
size = proc_size(len(obj))
self.incrementByteCount('setBytes', incr=1+size)
for value in obj:
self.computeOffsets(value, asReference=True)
elif isinstance(obj, (list, tuple)):
size = proc_size(len(obj))
self.incrementByteCount('arrayBytes', incr=1+size)
for value in obj:
asRef = True
self.computeOffsets(value, asReference=True)
elif isinstance(obj, dict):
size = proc_size(len(obj))
self.incrementByteCount('dictBytes', incr=1+size)
for key, value in iteritems(obj):
check_key(key)
self.computeOffsets(key, asReference=True)
self.computeOffsets(value, asReference=True)
else:
raise InvalidPlistException("Unknown object type.")
def writeObjectReference(self, obj, output):
"""Tries to write an object reference, adding it to the references
table. Does not write the actual object bytes or set the reference
position. Returns a tuple of whether the object was a new reference
(True if it was, False if it already was in the reference table)
and the new output.
"""
position = self.positionOfObjectReference(obj)
if position is None:
self.writtenReferences[obj] = len(self.writtenReferences)
output += self.binaryInt(len(self.writtenReferences) - 1, byteSize=self.trailer.objectRefSize)
return (True, output)
else:
output += self.binaryInt(position, byteSize=self.trailer.objectRefSize)
return (False, output)
def writeObject(self, obj, output, setReferencePosition=False):
"""Serializes the given object to the output. Returns output.
If setReferencePosition is True, will set the position the
object was written.
"""
def proc_variable_length(format, length):
result = b''
if length > 0b1110:
result += pack('!B', (format << 4) | 0b1111)
result = self.writeObject(length, result)
else:
result += pack('!B', (format << 4) | length)
return result
if isinstance(obj, (str, unicode)) and obj == unicodeEmpty:
# The Apple Plist decoder can't decode a zero length Unicode string.
obj = b''
if setReferencePosition:
self.referencePositions[obj] = len(output)
if obj is None:
output += pack('!B', 0b00000000)
elif isinstance(obj, BoolWrapper):
if obj.value is False:
output += pack('!B', 0b00001000)
else:
output += pack('!B', 0b00001001)
elif isinstance(obj, Uid):
size = self.intSize(obj)
output += pack('!B', (0b1000 << 4) | size - 1)
output += self.binaryInt(obj)
elif isinstance(obj, (int, long)):
byteSize = self.intSize(obj)
root = math.log(byteSize, 2)
output += pack('!B', (0b0001 << 4) | int(root))
output += self.binaryInt(obj, as_number=True)
elif isinstance(obj, FloatWrapper):
# just use doubles
output += pack('!B', (0b0010 << 4) | 3)
output += self.binaryReal(obj)
elif isinstance(obj, datetime.datetime):
timestamp = (obj - apple_reference_date).total_seconds()
output += pack('!B', 0b00110011)
output += pack('!d', float(timestamp))
elif isinstance(obj, Data):
output += proc_variable_length(0b0100, len(obj))
output += obj
elif isinstance(obj, unicode):
byteData = obj.encode('utf_16_be')
output += proc_variable_length(0b0110, len(byteData)//2)
output += byteData
elif isinstance(obj, bytes):
output += proc_variable_length(0b0101, len(obj))
output += obj
elif isinstance(obj, HashableWrapper):
obj = obj.value
if isinstance(obj, (set, list, tuple)):
if isinstance(obj, set):
output += proc_variable_length(0b1100, len(obj))
else:
output += proc_variable_length(0b1010, len(obj))
objectsToWrite = []
for objRef in obj:
(isNew, output) = self.writeObjectReference(objRef, output)
if isNew:
objectsToWrite.append(objRef)
for objRef in objectsToWrite:
output = self.writeObject(objRef, output, setReferencePosition=True)
elif isinstance(obj, dict):
output += proc_variable_length(0b1101, len(obj))
keys = []
values = []
objectsToWrite = []
for key, value in iteritems(obj):
keys.append(key)
values.append(value)
for key in keys:
(isNew, output) = self.writeObjectReference(key, output)
if isNew:
objectsToWrite.append(key)
for value in values:
(isNew, output) = self.writeObjectReference(value, output)
if isNew:
objectsToWrite.append(value)
for objRef in objectsToWrite:
output = self.writeObject(objRef, output, setReferencePosition=True)
return output
def writeOffsetTable(self, output):
"""Writes all of the object reference offsets."""
all_positions = []
writtenReferences = list(self.writtenReferences.items())
writtenReferences.sort(key=lambda x: x[1])
for obj,order in writtenReferences:
# Porting note: Elsewhere we deliberately replace empty unicdoe strings
# with empty binary strings, but the empty unicode string
# goes into writtenReferences. This isn't an issue in Py2
# because u'' and b'' have the same hash; but it is in
# Py3, where they don't.
if bytes != str and obj == unicodeEmpty:
obj = b''
position = self.referencePositions.get(obj)
if position is None:
raise InvalidPlistException("Error while writing offsets table. Object not found. %s" % obj)
output += self.binaryInt(position, self.trailer.offsetSize)
all_positions.append(position)
return output
def binaryReal(self, obj):
# just use doubles
result = pack('>d', obj.value)
return result
def binaryInt(self, obj, byteSize=None, as_number=False):
result = b''
if byteSize is None:
byteSize = self.intSize(obj)
if byteSize == 1:
result += pack('>B', obj)
elif byteSize == 2:
result += pack('>H', obj)
elif byteSize == 4:
result += pack('>L', obj)
elif byteSize == 8:
if as_number:
result += pack('>q', obj)
else:
result += pack('>Q', obj)
elif byteSize <= 16:
try:
result = pack('>Q', 0) + pack('>Q', obj)
except struct_error as e:
raise InvalidPlistException("Unable to pack integer %d: %s" % (obj, e))
else:
raise InvalidPlistException("Core Foundation can't handle integers with size greater than 16 bytes.")
return result
def intSize(self, obj):
"""Returns the number of bytes necessary to store the given integer."""
# SIGNED
if obj < 0: # Signed integer, always 8 bytes
return 8
# UNSIGNED
elif obj <= 0xFF: # 1 byte
return 1
elif obj <= 0xFFFF: # 2 bytes
return 2
elif obj <= 0xFFFFFFFF: # 4 bytes
return 4
# SIGNED
# 0x7FFFFFFFFFFFFFFF is the max.
elif obj <= 0x7FFFFFFFFFFFFFFF: # 8 bytes signed
return 8
elif obj <= 0xffffffffffffffff: # 8 bytes unsigned
return 16
else:
raise InvalidPlistException("Core Foundation can't handle integers with size greater than 8 bytes.")
def realSize(self, obj):
return 8