Cugu's blog

IT security & forensics

RSSTwitterGithub

Binary search in huge files

I needed a script which searches a sequence of bytes in a huge file. So I wrote one.

Maybe this helps somebody else as well.

    import os

    # simple search for some bytes in a huge file
    def bgrep(filename, search_term, chunk_size=1024*1024):
        # open file as binary
        with open(filename, 'rb') as disk_image:

            # iterate through file chunkwise
            file_pointer = 0
            file_size = os.path.getsize(filename)
            while file_pointer < file_size:

                # set file pointer to beginning of chunk
                disk_image.seek(file_pointer)

                # read chunk
                chunk = disk_image.read(chunk_size + len(search_term) - 1)

                # search bytes in chunk
                position = chunk.find(search_term)

                # find returns -1 if not found
                if position != -1:
                    return position

                # increment file pointer to the start of the next chunk
                file_pointer += chunk_size

        disk_image.close()
        return -1

    position = bgrep('fat.dd.dmg', b'php')
    print(position)