原始backup数据的提取
- 一直觉得微信聊天记录备份是一个麻烦事情,最近发现微信占用的空间越来越大,又舍不得删除,想着能够实现自动化备份微信聊天记录等信息。在互联网上看到有可以备份微信聊天记录的工具,但是是收费的。我觉得既然别人能备份,我们自己也应该可以试着做到。
- google了下,发现可以通过iTunes来备份手机数据。备份后,数据会保存在~/Library/Application Support/MobileSync/Backup目录下面。可以通过shift+command+g 然后输入地址直接跳转到该目录下。该目录下有备份的文件夹,随便选一个,进入之后,里面有:Info.plist、Manifest.mbdb、Manifest.plist、Status.plist等文件。
- 通过对这些数据进行分析,觉得Manifest.mbdb应该是一个数据库一样的文件,存储着各种信息,所以查了下怎么解析这个文件。
- 通过对Manifest.mbdb解析,看到了很多的文件以及对应的目录。这些应该就是备份的文件列表。然后试着对这些列表进行还原。通过Manifest.mbdb可以得到当前目录里面的这些文件对应关系。然后根据这些文件的信息,进行还原。
提取数据的Python脚本
#!/usr/bin/env python #~/Library/Application Support/MobileSync/Backup import sys import os import hashlib import shutilmbdx = {}dict = {} def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return "", offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo["start_offset"] = offset fileinfo["domain"], offset = getstring(data, offset) fileinfo["filename"], offset = getstring(data, offset) fileinfo["linktarget"], offset = getstring(data, offset) fileinfo["datahash"], offset = getstring(data, offset) fileinfo["unknown1"], offset = getstring(data, offset) fileinfo["mode"], offset = getint(data, offset, 2) fileinfo["unknown2"], offset = getint(data, offset, 4) fileinfo["unknown3"], offset = getint(data, offset, 4) fileinfo["userid"], offset = getint(data, offset, 4) fileinfo["groupid"], offset = getint(data, offset, 4) fileinfo["mtime"], offset = getint(data, offset, 4) fileinfo["atime"], offset = getint(data, offset, 4) fileinfo["ctime"], offset = getint(data, offset, 4) fileinfo["filelen"], offset = getint(data, offset, 8) fileinfo["flag"], offset = getint(data, offset, 1) fileinfo["numprops"], offset = getint(data, offset, 1) fileinfo["properties"] = {} for ii in range(fileinfo["numprops"]): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo["properties"][propname] = propval mbdb[fileinfo["start_offset"]] = fileinfo fullpath = fileinfo["domain"] + "-" + fileinfo["filename"] id = hashlib.sha1(fullpath) mbdx[fileinfo["start_offset"]] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = "r" else: r = "-" if (val & 0x2): w = "w" else: w = "-" if (val & 0x1): x = "x" else: x = "-" return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): # if not verbose: return "(%s)%s::%s" % (f["fileID"], f["domain"], f["filename"]) dict[f["fileID"]] = f["filename"] if not verbose: return "%s => %s (%s)" % (f["fileID"], f["filename"], f["domain"]) if (f["mode"] & 0xE000) == 0xA000: type = "l" # symlink elif (f["mode"] & 0xE000) == 0x8000: type = "-" # file elif (f["mode"] & 0xE000) == 0x4000: type = "d" # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f["mode"], fileinfo_str(f, False)) type = "?" # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f["mode"]&0x0FFF) , f["userid"], f["groupid"], f["filelen"], f["mtime"], f["atime"], f["ctime"], f["fileID"], f["domain"], f["filename"])) if type == "l": info = info + " -> " + f["linktarget"] # symlink destination for name, value in f["properties"].items(): # extra properties info = info + " " + name + "=" + repr(value) return infoverbose = True if __name__ == "__main__": if len(sys.argv)!=3: print "\nUsage: Python iOS-Corrupted-Backup-Reader.py [Full path to backup directory] [Full path to output directory]\n" print "Example: Python iOS-Corrupted-Backup-Reader.py c:\backup c:\output" sys.exit(0) backuppath=sys.argv[1] outputpath=sys.argv[2] if os.path.exists(backuppath)==0: print "Backup directory not found." sys.exit(0) if os.path.exists(outputpath)==0: print "Output directory not found. Create the directory before running the script." sys.exit(0) if backuppath[:-1]!="/": backuppath=backuppath+"/" if outputpath[:-1]!="/": outputpath=outputpath+"/" mbdb = process_mbdb_file(backuppath+"Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo["fileID"] = mbdx[offset] else: fileinfo["fileID"] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo) folder=os.listdir(backuppath) for fname in folder: ffullname=backuppath+fname if fname in dict: tmp=dict[fname] odir,oname=tmp[:tmp.rfind("/")],tmp[tmp.rfind("/")+1:] if os.path.exists(outputpath+odir)==0: os.makedirs(outputpath+odir) print ">>makedirs "+outputpath+odir try: shutil.copy(ffullname,outputpath+odir+"/"+oname) print ">>copy from "+ffullname+" to "+outputpath+odir+"/"+oname except: pass folder=os.listdir(backuppath) for fname in folder: ffullname=backuppath+fname print "handler "+ffullname f=open(ffullname,"rb") ftype=f.read(15) f.close() try: if os.path.exists(outputpath+"other-data")==0: os.makedirs(outputpath+"other-data") if ftype.find("bplist")!=-1 or ftype.find("<?xml")!=-1: mtype=".plist" if ftype.find("SQLite")!=-1: mtype=".sqlitedb" if ftype.find("JFIF")!=-1 or ftype.find("Exif")!=-1: mtype=".jpeg" if ftype.find("PNG")!=-1: mtype=".png" if ftype.find("cook")!=-1: mtype=".binarycookies" if ftype.find("ftypqt")!=-1: mtype=".mov" if ftype.find("ID3")!=-1: mtype=".mp3" file_path=outputpath+"other-data"+"/"+fname+mtype if(os.path.exists(file_path)==False): shutil.copy(ffullname,file_path) print ">>copy from "+ffullname+" to "+file_path else: outfilename=outputpath+"other-data"+"/"+fname+str(randrange(0,1000))+mtype shutil.copy(ffullname,outfilename) print ">>copy from "+ffullname+" to "+outfilename except: pass print "Files successfully moved to"+outputpath
说明:
手机备份的文件所在路径:
~/Library/Application Support/MobileSync/Backup
执行样例:
Python ReadiPhoneBackupData.py/Library/Application Support/MobileSync/Backup/xxx /Users/xxx/Desktop/xxx