Binary search in huge files
I needed a script which searches a sequence of bytes in a huge file. So I wrote one.
Maybe this helps somebody else as well.
import os
# simple search for some bytes in a huge file
def bgrep(filename, search_term, chunk_size=1024*1024):
# open file as binary
with open(filename, 'rb') as disk_image:
# iterate through file chunkwise
file_pointer = 0
file_size = os.path.getsize(filename)
while file_pointer < file_size:
# set file pointer to beginning of chunk
disk_image.seek(file_pointer)
# read chunk
chunk = disk_image.read(chunk_size + len(search_term) - 1)
# search bytes in chunk
position = chunk.find(search_term)
# find returns -1 if not found
if position != -1:
return position
# increment file pointer to the start of the next chunk
file_pointer += chunk_size
disk_image.close()
return -1
position = bgrep('fat.dd.dmg', b'php')
print(position)