# SECUREAUTH LABS. Copyright 2018 SecureAuth Corporation. All rights reserved. # # This software is provided under under a slightly modified version # of the Apache Software License. See the accompanying LICENSE file # for more information. # # Description: # Microsoft Extensive Storage Engine parser, just focused on trying # to parse NTDS.dit files (not meant as a full parser, although it might work) # # Author: # Alberto Solino (@agsolino) # # Reference for: # Structure. # # Excellent reference done by Joachim Metz # http://forensic-proof.com/wp-content/uploads/2011/07/Extensible-Storage-Engine-ESE-Database-File-EDB-format.pdf # # ToDo: # [ ] Parse multi-values properly # [ ] Support long values properly from __future__ import division from __future__ import print_function from impacket import LOG try: from collections import OrderedDict except: try: from ordereddict.ordereddict import OrderedDict except: from ordereddict import OrderedDict from impacket.structure import Structure, hexdump from struct import unpack from binascii import hexlify from six import b # Constants FILE_TYPE_DATABASE = 0 FILE_TYPE_STREAMING_FILE = 1 # Database state JET_dbstateJustCreated = 1 JET_dbstateDirtyShutdown = 2 JET_dbstateCleanShutdown = 3 JET_dbstateBeingConverted = 4 JET_dbstateForceDetach = 5 # Page Flags FLAGS_ROOT = 1 FLAGS_LEAF = 2 FLAGS_PARENT = 4 FLAGS_EMPTY = 8 FLAGS_SPACE_TREE = 0x20 FLAGS_INDEX = 0x40 FLAGS_LONG_VALUE = 0x80 FLAGS_NEW_FORMAT = 0x2000 FLAGS_NEW_CHECKSUM = 0x2000 # Tag Flags TAG_UNKNOWN = 0x1 TAG_DEFUNCT = 0x2 TAG_COMMON = 0x4 # Fixed Page Numbers DATABASE_PAGE_NUMBER = 1 CATALOG_PAGE_NUMBER = 4 CATALOG_BACKUP_PAGE_NUMBER = 24 # Fixed FatherDataPages DATABASE_FDP = 1 CATALOG_FDP = 2 CATALOG_BACKUP_FDP = 3 # Catalog Types CATALOG_TYPE_TABLE = 1 CATALOG_TYPE_COLUMN = 2 CATALOG_TYPE_INDEX = 3 CATALOG_TYPE_LONG_VALUE = 4 CATALOG_TYPE_CALLBACK = 5 # Column Types JET_coltypNil = 0 JET_coltypBit = 1 JET_coltypUnsignedByte = 2 JET_coltypShort = 3 JET_coltypLong = 4 JET_coltypCurrency = 5 JET_coltypIEEESingle = 6 JET_coltypIEEEDouble = 7 JET_coltypDateTime = 8 JET_coltypBinary = 9 JET_coltypText = 10 JET_coltypLongBinary = 11 JET_coltypLongText = 12 JET_coltypSLV = 13 JET_coltypUnsignedLong = 14 JET_coltypLongLong = 15 JET_coltypGUID = 16 JET_coltypUnsignedShort= 17 JET_coltypMax = 18 ColumnTypeToName = { JET_coltypNil : 'NULL', JET_coltypBit : 'Boolean', JET_coltypUnsignedByte : 'Signed byte', JET_coltypShort : 'Signed short', JET_coltypLong : 'Signed long', JET_coltypCurrency : 'Currency', JET_coltypIEEESingle : 'Single precision FP', JET_coltypIEEEDouble : 'Double precision FP', JET_coltypDateTime : 'DateTime', JET_coltypBinary : 'Binary', JET_coltypText : 'Text', JET_coltypLongBinary : 'Long Binary', JET_coltypLongText : 'Long Text', JET_coltypSLV : 'Obsolete', JET_coltypUnsignedLong : 'Unsigned long', JET_coltypLongLong : 'Long long', JET_coltypGUID : 'GUID', JET_coltypUnsignedShort: 'Unsigned short', JET_coltypMax : 'Max', } ColumnTypeSize = { JET_coltypNil : None, JET_coltypBit : (1,'B'), JET_coltypUnsignedByte : (1,'B'), JET_coltypShort : (2,' 8192: self.structure += self.extended_win7 Structure.__init__(self,data) class ESENT_ROOT_HEADER(Structure): structure = ( ('InitialNumberOfPages',' 0: # Include the common header self.structure = self.common + self.structure Structure.__init__(self,data) class ESENT_LEAF_HEADER(Structure): structure = ( ('CommonPageKey',':'), ) class ESENT_LEAF_ENTRY(Structure): common = ( ('CommonPageKeySize',' 0: # Include the common header self.structure = self.common + self.structure Structure.__init__(self,data) class ESENT_SPACE_TREE_HEADER(Structure): structure = ( ('Unknown','?@[\\]^_`{|}~ ': # return x # else: # return '.' # #def hexdump(data): # x=str(data) # strLen = len(x) # i = 0 # while i < strLen: # print "%04x " % i, # for j in range(16): # if i+j < strLen: # print "%02X" % ord(x[i+j]), # # else: # print " ", # if j%16 == 7: # print "", # print " ", # print ''.join(pretty_print(x) for x in x[i:i+16] ) # i += 16 def getUnixTime(t): t -= 116444736000000000 t //= 10000000 return t class ESENT_PAGE: def __init__(self, db, data=None): self.__DBHeader = db self.data = data self.record = None if data is not None: self.record = ESENT_PAGE_HEADER(self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision'], self.__DBHeader['PageSize'], data) def printFlags(self): flags = self.record['PageFlags'] if flags & FLAGS_EMPTY: print("\tEmpty") if flags & FLAGS_INDEX: print("\tIndex") if flags & FLAGS_LEAF: print("\tLeaf") else: print("\tBranch") if flags & FLAGS_LONG_VALUE: print("\tLong Value") if flags & FLAGS_NEW_CHECKSUM: print("\tNew Checksum") if flags & FLAGS_NEW_FORMAT: print("\tNew Format") if flags & FLAGS_PARENT: print("\tParent") if flags & FLAGS_ROOT: print("\tRoot") if flags & FLAGS_SPACE_TREE: print("\tSpace Tree") def dump(self): baseOffset = len(self.record) self.record.dump() tags = self.data[-4*self.record['FirstAvailablePageTag']:] print("FLAGS: ") self.printFlags() print() for i in range(self.record['FirstAvailablePageTag']): tag = tags[-4:] if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] > 11 and self.__DBHeader['PageSize'] > 8192: valueSize = unpack('> 5 #print "TAG FLAG: 0x%x " % (unpack('> 5 #print "TAG FLAG: 0x " , ord(self.data[baseOffset+valueOffset:][0]) else: valueSize = unpack('> 13 valueOffset = unpack(' 0: rootHeader = ESENT_ROOT_HEADER(self.getTag(0)[1]) rootHeader.dump() elif self.record['PageFlags'] & FLAGS_LEAF == 0: # Branch Header flags, data = self.getTag(0) branchHeader = ESENT_BRANCH_HEADER(data) branchHeader.dump() else: # Leaf Header flags, data = self.getTag(0) if self.record['PageFlags'] & FLAGS_SPACE_TREE > 0: # Space Tree spaceTreeHeader = ESENT_SPACE_TREE_HEADER(data) spaceTreeHeader.dump() else: leafHeader = ESENT_LEAF_HEADER(data) leafHeader.dump() # Print the leaf/branch tags for tagNum in range(1,self.record['FirstAvailablePageTag']): flags, data = self.getTag(tagNum) if self.record['PageFlags'] & FLAGS_LEAF == 0: # Branch page branchEntry = ESENT_BRANCH_ENTRY(flags, data) branchEntry.dump() elif self.record['PageFlags'] & FLAGS_LEAF > 0: # Leaf page if self.record['PageFlags'] & FLAGS_SPACE_TREE > 0: # Space Tree spaceTreeEntry = ESENT_SPACE_TREE_ENTRY(data) #spaceTreeEntry.dump() elif self.record['PageFlags'] & FLAGS_INDEX > 0: # Index Entry indexEntry = ESENT_INDEX_ENTRY(data) #indexEntry.dump() elif self.record['PageFlags'] & FLAGS_LONG_VALUE > 0: # Long Page Value raise Exception('Long value still not supported') else: # Table Value leafEntry = ESENT_LEAF_ENTRY(flags, data) dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(leafEntry['EntryData']) dataDefinitionHeader.dump() catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(leafEntry['EntryData'][len(dataDefinitionHeader):]) catalogEntry.dump() hexdump(leafEntry['EntryData']) def getTag(self, tagNum): if self.record['FirstAvailablePageTag'] < tagNum: raise Exception('Trying to grab an unknown tag 0x%x' % tagNum) tags = self.data[-4*self.record['FirstAvailablePageTag']:] baseOffset = len(self.record) for i in range(tagNum): tags = tags[:-4] tag = tags[-4:] if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] >= 17 and self.__DBHeader['PageSize'] > 8192: valueSize = unpack('> 5 tmpData[1] = chr(ord(tmpData[1:2]) & 0x1f) tagData = "".join(tmpData) else: valueSize = unpack('> 13 valueOffset = unpack(' 127: numEntries = dataDefinitionHeader['LastVariableDataType'] - 127 else: numEntries = dataDefinitionHeader['LastVariableDataType'] itemLen = unpack(' 0: # Leaf page if page.record['PageFlags'] & FLAGS_SPACE_TREE > 0: pass elif page.record['PageFlags'] & FLAGS_INDEX > 0: pass elif page.record['PageFlags'] & FLAGS_LONG_VALUE > 0: pass else: # Table Value leafEntry = ESENT_LEAF_ENTRY(flags, data) self.__addItem(leafEntry) def parseCatalog(self, pageNum): # Parse all the pages starting at pageNum and commit table data page = self.getPage(pageNum) self.parsePage(page) for i in range(1, page.record['FirstAvailablePageTag']): flags, data = page.getTag(i) if page.record['PageFlags'] & FLAGS_LEAF == 0: # Branch page branchEntry = ESENT_BRANCH_ENTRY(flags, data) self.parseCatalog(branchEntry['ChildPageNumber']) def readHeader(self): LOG.debug("Reading Boot Sector for %s" % self.__volumeName) def getPage(self, pageNum): LOG.debug("Trying to fetch page %d (0x%x)" % (pageNum, (pageNum+1)*self.__pageSize)) self.__DB.seek((pageNum+1)*self.__pageSize, 0) data = self.__DB.read(self.__pageSize) while len(data) < self.__pageSize: remaining = self.__pageSize - len(data) data += self.__DB.read(remaining) # Special case for the first page if pageNum <= 0: return data else: return ESENT_PAGE(self.__DBHeader, data) def close(self): self.__DB.close() def openTable(self, tableName): # Returns a cursos for later use if isinstance(tableName, bytes) is not True: tableName = b(tableName) if tableName in self.__tables: entry = self.__tables[tableName]['TableEntry'] dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData']) catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):]) # Let's position the cursor at the leaf levels for fast reading pageNum = catalogEntry['FatherDataPageNumber'] done = False while done is False: page = self.getPage(pageNum) if page.record['FirstAvailablePageTag'] <= 1: # There are no records done = True for i in range(1, page.record['FirstAvailablePageTag']): flags, data = page.getTag(i) if page.record['PageFlags'] & FLAGS_LEAF == 0: # Branch page, move on to the next page branchEntry = ESENT_BRANCH_ENTRY(flags, data) pageNum = branchEntry['ChildPageNumber'] break else: done = True break cursor = TABLE_CURSOR cursor['TableData'] = self.__tables[tableName] cursor['FatherDataPageNumber'] = catalogEntry['FatherDataPageNumber'] cursor['CurrentPageData'] = page cursor['CurrentTag'] = 0 return cursor else: return None def __getNextTag(self, cursor): page = cursor['CurrentPageData'] if cursor['CurrentTag'] >= page.record['FirstAvailablePageTag']: # No more data in this page, chau return None flags, data = page.getTag(cursor['CurrentTag']) if page.record['PageFlags'] & FLAGS_LEAF > 0: # Leaf page if page.record['PageFlags'] & FLAGS_SPACE_TREE > 0: raise Exception('FLAGS_SPACE_TREE > 0') elif page.record['PageFlags'] & FLAGS_INDEX > 0: raise Exception('FLAGS_INDEX > 0') elif page.record['PageFlags'] & FLAGS_LONG_VALUE > 0: raise Exception('FLAGS_LONG_VALUE > 0') else: # Table Value leafEntry = ESENT_LEAF_ENTRY(flags, data) return leafEntry return None def getNextRow(self, cursor): cursor['CurrentTag'] += 1 tag = self.__getNextTag(cursor) #hexdump(tag) if tag is None: # No more tags in this page, search for the next one on the right page = cursor['CurrentPageData'] if page.record['NextPageNumber'] == 0: # No more pages, chau return None else: cursor['CurrentPageData'] = self.getPage(page.record['NextPageNumber']) cursor['CurrentTag'] = 0 return self.getNextRow(cursor) else: return self.__tagToRecord(cursor, tag['EntryData']) def __tagToRecord(self, cursor, tag): # So my brain doesn't forget, the data record is composed of: # Header # Fixed Size Data (ID < 127) # The easiest to parse. Their size is fixed in the record. You can get its size # from the Column Record, field SpaceUsage # Variable Size Data (127 < ID < 255) # At VariableSizeOffset you get an array of two bytes per variable entry, pointing # to the length of the value. Values start at: # numEntries = LastVariableDataType - 127 # VariableSizeOffset + numEntries * 2 (bytes) # Tagged Data ( > 255 ) # After the Variable Size Value, there's more data for the tagged values. # Right at the beginning there's another array (taggedItems), pointing to the # values, size. # # The interesting thing about this DB records is there's no need for all the columns to be there, hence # saving space. That's why I got over all the columns, and if I find data (of any type), i assign it. If # not, the column's empty. # # There are a lot of caveats in the code, so take your time to explore it. # # ToDo: Better complete this description # record = OrderedDict() taggedItems = OrderedDict() taggedItemsParsed = False dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(tag) #dataDefinitionHeader.dump() variableDataBytesProcessed = (dataDefinitionHeader['LastVariableDataType'] - 127) * 2 prevItemLen = 0 tagLen = len(tag) fixedSizeOffset = len(dataDefinitionHeader) variableSizeOffset = dataDefinitionHeader['VariableSizeOffset'] columns = cursor['TableData']['Columns'] for column in list(columns.keys()): columnRecord = columns[column]['Record'] #columnRecord.dump() if columnRecord['Identifier'] <= dataDefinitionHeader['LastFixedSize']: # Fixed Size column data type, still available data record[column] = tag[fixedSizeOffset:][:columnRecord['SpaceUsage']] fixedSizeOffset += columnRecord['SpaceUsage'] elif 127 < columnRecord['Identifier'] <= dataDefinitionHeader['LastVariableDataType']: # Variable data type index = columnRecord['Identifier'] - 127 - 1 itemLen = unpack(' 255: # Have we parsed the tagged items already? if taggedItemsParsed is False and (variableDataBytesProcessed+variableSizeOffset) < tagLen: index = variableDataBytesProcessed+variableSizeOffset #hexdump(tag[index:]) endOfVS = self.__pageSize firstOffsetTag = (unpack('= 17 and self.__DBHeader['PageSize'] > 8192: flagsPresent = 1 else: flagsPresent = (unpack('= firstOffsetTag: # We reached the end of the variable size array break # Calculate length of variable items # Ugly.. should be redone prevKey = list(taggedItems.keys())[0] for i in range(1,len(taggedItems)): offset0, length, flags = taggedItems[prevKey] offset, _, _ = list(taggedItems.items())[i][1] taggedItems[prevKey] = (offset0, offset-offset0, flags) #print "ID: %d, Offset: %d, Len: %d, flags: %d" % (prevKey, offset0, offset-offset0, flags) prevKey = list(taggedItems.keys())[i] taggedItemsParsed = True # Tagged data type if columnRecord['Identifier'] in taggedItems: offsetItem = variableDataBytesProcessed + variableSizeOffset + taggedItems[columnRecord['Identifier']][0] itemSize = taggedItems[columnRecord['Identifier']][1] # If item have flags, we should skip them if taggedItems[columnRecord['Identifier']][2] > 0: itemFlag = ord(tag[offsetItem:offsetItem+1]) offsetItem += 1 itemSize -= 1 else: itemFlag = 0 #print "ID: %d, itemFlag: 0x%x" %( columnRecord['Identifier'], itemFlag) if itemFlag & (TAGGED_DATA_TYPE_COMPRESSED ): LOG.error('Unsupported tag column: %s, flag:0x%x' % (column, itemFlag)) record[column] = None elif itemFlag & TAGGED_DATA_TYPE_MULTI_VALUE: # ToDo: Parse multi-values properly LOG.debug('Multivalue detected in column %s, returning raw results' % (column)) record[column] = (hexlify(tag[offsetItem:][:itemSize]),) else: record[column] = tag[offsetItem:][:itemSize] else: record[column] = None else: record[column] = None # If we understand the data type, we unpack it and cast it accordingly # otherwise, we just encode it in hex if type(record[column]) is tuple: # A multi value data, we won't decode it, just leave it this way record[column] = record[column][0] elif columnRecord['ColumnType'] == JET_coltypText or columnRecord['ColumnType'] == JET_coltypLongText: # Let's handle strings if record[column] is not None: if columnRecord['CodePage'] not in StringCodePages: raise Exception('Unknown codepage 0x%x'% columnRecord['CodePage']) stringDecoder = StringCodePages[columnRecord['CodePage']] try: record[column] = record[column].decode(stringDecoder) except Exception: LOG.debug("Exception:", exc_info=True) LOG.debug('Fixing Record[%r][%d]: %r' % (column, columnRecord['ColumnType'], record[column])) record[column] = record[column].decode(stringDecoder, "replace") pass else: unpackData = ColumnTypeSize[columnRecord['ColumnType']] if record[column] is not None: if unpackData is None: record[column] = hexlify(record[column]) else: unpackStr = unpackData[1] record[column] = unpack(unpackStr, record[column])[0] return record