#!/usr/bin/env python # Copyright Paul Sladen , 2005-05-14 # You may use this work under the terms of the GNU GPL. # # Synopsis: # 1. call dumpe2fs /dev/xxxx | grep -E '^( Free blocks: |Block size:)' # 2. decode Block size, eg. 4096 bytes # 3. decode ranges of Free blocks, like: 123, 132-145, 149-150, 167 # 4. open '/dev/xxxx' for writing # 5. seek to each location (block_number * block_size) and write lots of NUL # 6. profit """\ e2fszero 0.1 (2005-05-14) Usage: e2fs-zero [-h] [-v] [-w|-n] ext2-filesystem Zero unused blocks in an Ext2 Filesystem, to increase compression and rsyncability. -h --help this message -v --verbose extra information -n --dryrun disable writing to the filesystem -w --write enable writing to the filesystem (default) Note: This program relies on 'dumpe2fs' to do the dangerous calculations! NOTE: YES, THIS PROGRAM REALLY WILL OVERWRITE (bits of) YOUR FILESYSTEM WITH NULLS\ """ DUMPE2FS = '/sbin/dumpe2fs' import os, sys # messages verbose = False # enable writing operations dangerous = False def main(): global verbose, dangerous, DUMPE2FS # catch people who need usage help # this is the worst and more incorrect piece of code in here leftover = [] for fight in sys.argv[1:]: if fight == '-v' or fight == '--verbose': verbose = True continue elif fight == '-n' or fight == '--dryrun': dangerous = False continue elif fight == '-w' or fight == '--write': dangerous = True continue elif fight[0] == '-': print __doc__ sys.exit() leftover.append(fight) #print `leftover` try: if len(leftover) != 1: raise 'ArgumentError' filesystem = leftover[0] if len(filesystem) <= 0: raise 'NoFilesystemName' except: print >> sys.stderr, __doc__ sys.exit() # We need access to the filesystem image (either a block device or a very large file) # and we also need to have 'dumpe2fs', otherwise we can't open a pipe() from it. try: stat = os.stat(filesystem) stat = os.stat(DUMPE2FS) # Might aswell just let the user see any stderr errors from dumpe2fs, # although annoying it prints a banner first #out, err = os.popen3("%s '%s'" % (DUMPE2FS, filesystem))[1:] sys.stderr.write('calling ') pipe = os.popen("%s '%s'" % (DUMPE2FS, filesystem)) except OSError: print >> sys.stderr, "$(PROGRAM): can't access $(filesystem), try --help" # We're looking for the following lines from dumpe2fs, in order, and ignoring the rest: # Filesystem volume name: # Free blocks: 134859 # Block size: 4096 # Free blocks: 1123, 1345-1456, 1567, 1678-1789 # Free blocks: 2123-2345, 2456-2567, 2678, 2789 s = pipe.readline() if s <= 'Filesystem volume name:': raise "Failed to parse correct dumpe2fs output" # 'Free blocks:' while not s.startswith('Free blocks:') and len(s) > 0: s = pipe.readline() try: free_blocks = int(s.strip().split(': ')[1]) except: raise "Failed to parse unused block count ('Free blocks:')" if verbose: print "Detected filsystem contains %d free blocks" % (free_blocks) # 'Block size:' while not s.startswith('Block size:') and len(s) > 0: s = pipe.readline() try: block_size = int(s.strip().split(': ')[1]) except: raise "Failed to parse filesystem block-size ('Block size:')" if verbose: print "Detected filsystem block_size = %d bytes" % (block_size) # 'Free blocks:' (multiple entries, one per Ext2 "group") free_ranges = [] while True: while len(s) and not s.startswith(' Free blocks:'): try: s = pipe.readline() except: raise "failed to read" # Detect EOF if not len(s): break #print len(s), `s` # Strip the label: and separate the commas try: #print `s.strip()` free_ranges += s.split(': ', 1)[1].strip().split(', ')[:] except: print >> sys.stderr, `s` raise "Failed to parse free_ranges (' Free blocks:')" s = pipe.readline() #print `free_ranges` # Turn the strings into integer lists of useful free blocks # 'blocks' contains each free blocks and get _very_ big # 'wipes' contains [offset, length] pairs record_blocks = False record_wipes = True blocks = [] wipes = [] free_block_count = 0 for egg in free_ranges: if len(egg) > 0: # Assuming this ext2 group has some spare space in it... try: # Find some ranges (Ranges are inclusive, eg. 172-184) if egg.find('-') > 0: #blocks += range(*map(int, egg.split('-'))) a, b = egg.split('-') if record_blocks: blocks += range(int(a), int(b) + 1) if record_wipes: wipes.append([block_size * int(a), block_size * (int(b) - int(a) + 1)]) free_block_count += int(b) - int(a) + 1 # But some are singular (eg. '199') is just one free block on its own else: if record_blocks: blocks += [int(egg)] if record_wipes: wipes.append([block_size * int(egg), block_size]) free_block_count += 1 except: # since we're nearly at the point of writing to the disk, # it probably better to just safely roll over and die print "Bzzzz on trying to decode " + `egg` blocks.sort() #print len(blocks), `blocks` if verbose: print len(wipes), 'offset/length pairs', `wipes` if verbose or free_blocks != free_block_count: print "Free blocks; parsed: %d, decoded: %d" % (free_blocks, free_block_count) if free_blocks != free_block_count: raise 'Decoded Free blocks do not match count in filesystem!' perform_wipe(filesystem, wipes) WRITE_SIZE = 2**18 PADDING = '\0' # fstream file-access [open/f.write/f.tell] seems to have some # grave funnyiness that causes the file to be randomly truncated. # Since I spent a good while tearing my hair out over this, I've # changed it to just use the normal POSIX os.open/os.write/close # Here we take the offset/length pairs decoded above, open the # ext2 filesystem image and overwrite the unused areas. # it would be handy to truncate areas (make them sparse) so that they # don't actually take up space on disk to... def perform_wipe(filename, wipes = [[0, 0]]): progress_counter = 0.0 percentage = 100.0 / len(wipes) empty_space = PADDING * WRITE_SIZE #f = open(filename, 'w') if dangerous: mode = os.O_WRONLY|os.EX_CANTCREAT else: mode = os.O_RDONLY|os.EX_CANTCREAT fd = os.open(filename, mode) # Don't waste space on a tty, display a progress percentage instead. if sys.stdout.isatty(): end = '\r' else: end = '\n' for offset, length in wipes: progress_counter += percentage sys.stdout.write("wiping position %16d for %16d bytes (%5.1f%%)%s" % (offset, length, progress_counter, end)) #f.seek(offset) os.lseek(fd, offset, 0) #print 'currently at (before) ' + `f.tell()` #print 'currently at (before) ' + `os.tell(fd)` # only write 256kB at a time, since we can stick that in a buffer # and not have Python regenerate HUGE arrays each time if 1: while length >= WRITE_SIZE and length > 0: #f.write(empty_space) #length -= WRITE_SIZE if dangerous: length -= os.write(fd, empty_space) else: length -= WRITE_SIZE #f.write('\xaa' * length) #f.write('hello') if dangerous: os.write(fd, PADDING * length) #print 'currently at (after) ' + `f.tell()` #print 'currently at (after) ' + `os.tell(fd)` #f.close() os.close(fd) if sys.stdout.isatty(): print if verbose: print 'All done! Hopefully your filesystem is not toast.' if __name__ == '__main__': main()