From 98e4c5b8c2e103720336968d597abe09d2ba501c Mon Sep 17 00:00:00 2001 From: LaMont Jones Date: Thu, 8 Sep 2005 16:17:31 +0000 Subject: [PATCH] e2fs-zero.py from Paul Sladen --- debian/changelog | 6 +- e2fs-zero.py | 235 +++++++++++++++++++++++++++++++++++++++++++++++ livecd.sh | 1 + 3 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 e2fs-zero.py diff --git a/debian/changelog b/debian/changelog index abc15501..c7cbd680 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,10 @@ livecd-rootfs (0.24) hoary-cat-UNRELEASED; urgency=low - * dummy + * Incorporate e2fs-zero.py from Paul Sladen: + http://www.paul.sladen.org/ubuntu/e2fszero/e2fs-zero.py + * Print package version during script run. - -- LaMont Jones Thu, 8 Sep 2005 09:57:22 -0600 + -- LaMont Jones Thu, 8 Sep 2005 10:13:48 -0600 livecd-rootfs (0.23) hoary-cat; urgency=low diff --git a/e2fs-zero.py b/e2fs-zero.py new file mode 100644 index 00000000..794daa52 --- /dev/null +++ b/e2fs-zero.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python +# Copyright Paul Sladen , 2005-05-14 +# You may use this work under the terms of the GNU GPL. +# +# Synopsis: +# 1. call dumpe2fs /dev/xxxx | grep -E '^( Free blocks: |Block size:)' +# 2. decode Block size, eg. 4096 bytes +# 3. decode ranges of Free blocks, like: 123, 132-145, 149-150, 167 +# 4. open '/dev/xxxx' for writing +# 5. seek to each location (block_number * block_size) and write lots of NUL +# 6. profit + +"""\ +e2fszero 0.1 (2005-05-14) +Usage: e2fs-zero [-h] [-v] [-w|-n] ext2-filesystem +Zero unused blocks in an Ext2 Filesystem, to increase compression and rsyncability. + -h --help this message + -v --verbose extra information + -n --dryrun disable writing to the filesystem + -w --write enable writing to the filesystem (default) +Note: This program relies on 'dumpe2fs' to do the dangerous calculations! +NOTE: YES, THIS PROGRAM REALLY WILL OVERWRITE (bits of) YOUR FILESYSTEM WITH NULLS\ +""" +DUMPE2FS = '/sbin/dumpe2fs' +import os, sys + +# messages +verbose = False +# enable writing operations +dangerous = False + +def main(): + global verbose, dangerous, DUMPE2FS + + # catch people who need usage help + # this is the worst and more incorrect piece of code in here + + leftover = [] + for fight in sys.argv[1:]: + if fight == '-v' or fight == '--verbose': + verbose = True + continue + elif fight == '-n' or fight == '--dryrun': + dangerous = False + continue + elif fight == '-w' or fight == '--write': + dangerous = True + continue + elif fight[0] == '-': + print __doc__ + sys.exit() + leftover.append(fight) + + #print `leftover` + + try: + if len(leftover) != 1: + raise 'ArgumentError' + filesystem = leftover[0] + if len(filesystem) <= 0: + raise 'NoFilesystemName' + except: + print >> sys.stderr, __doc__ + sys.exit() + + # We need access to the filesystem image (either a block device or a very large file) + # and we also need to have 'dumpe2fs', otherwise we can't open a pipe() from it. + + try: + stat = os.stat(filesystem) + stat = os.stat(DUMPE2FS) + # Might aswell just let the user see any stderr errors from dumpe2fs, + # although annoying it prints a banner first + #out, err = os.popen3("%s '%s'" % (DUMPE2FS, filesystem))[1:] + sys.stderr.write('calling ') + pipe = os.popen("%s '%s'" % (DUMPE2FS, filesystem)) + except OSError: + print >> sys.stderr, "$(PROGRAM): can't access $(filesystem), try --help" + + # We're looking for the following lines from dumpe2fs, in order, and ignoring the rest: + # Filesystem volume name: + # Free blocks: 134859 + # Block size: 4096 + # Free blocks: 1123, 1345-1456, 1567, 1678-1789 + # Free blocks: 2123-2345, 2456-2567, 2678, 2789 + + s = pipe.readline() + if s <= 'Filesystem volume name:': + raise "Failed to parse correct dumpe2fs output" + + # 'Free blocks:' + while not s.startswith('Free blocks:') and len(s) > 0: + s = pipe.readline() + try: + free_blocks = int(s.strip().split(': ')[1]) + except: + raise "Failed to parse unused block count ('Free blocks:')" + if verbose: + print "Detected filsystem contains %d free blocks" % (free_blocks) + + # 'Block size:' + while not s.startswith('Block size:') and len(s) > 0: + s = pipe.readline() + try: + block_size = int(s.strip().split(': ')[1]) + except: + raise "Failed to parse filesystem block-size ('Block size:')" + if verbose: + print "Detected filsystem block_size = %d bytes" % (block_size) + + # 'Free blocks:' (multiple entries, one per Ext2 "group") + free_ranges = [] + while True: + while len(s) and not s.startswith(' Free blocks:'): + try: + s = pipe.readline() + except: + raise "failed to read" + # Detect EOF + if not len(s): + break + #print len(s), `s` + # Strip the label: and separate the commas + try: + #print `s.strip()` + free_ranges += s.split(': ', 1)[1].strip().split(', ')[:] + except: + print >> sys.stderr, `s` + raise "Failed to parse free_ranges (' Free blocks:')" + s = pipe.readline() + #print `free_ranges` + + # Turn the strings into integer lists of useful free blocks + # 'blocks' contains each free blocks and get _very_ big + # 'wipes' contains [offset, length] pairs + record_blocks = False + record_wipes = True + blocks = [] + wipes = [] + free_block_count = 0 + + for egg in free_ranges: + if len(egg) > 0: + # Assuming this ext2 group has some spare space in it... + try: + # Find some ranges (Ranges are inclusive, eg. 172-184) + if egg.find('-') > 0: + #blocks += range(*map(int, egg.split('-'))) + a, b = egg.split('-') + if record_blocks: blocks += range(int(a), int(b) + 1) + if record_wipes: wipes.append([block_size * int(a), block_size * (int(b) - int(a) + 1)]) + free_block_count += int(b) - int(a) + 1 + # But some are singular (eg. '199') is just one free block on its own + else: + if record_blocks: blocks += [int(egg)] + if record_wipes: wipes.append([block_size * int(egg), block_size]) + free_block_count += 1 + except: + # since we're nearly at the point of writing to the disk, + # it probably better to just safely roll over and die + print "Bzzzz on trying to decode " + `egg` + blocks.sort() + #print len(blocks), `blocks` + if verbose: + print len(wipes), 'offset/length pairs', `wipes` + if verbose or free_blocks != free_block_count: + print "Free blocks; parsed: %d, decoded: %d" % (free_blocks, free_block_count) + if free_blocks != free_block_count: + raise 'Decoded Free blocks do not match count in filesystem!' + perform_wipe(filesystem, wipes) + +WRITE_SIZE = 2**18 +PADDING = '\0' + +# fstream file-access [open/f.write/f.tell] seems to have some +# grave funnyiness that causes the file to be randomly truncated. +# Since I spent a good while tearing my hair out over this, I've +# changed it to just use the normal POSIX os.open/os.write/close + +# Here we take the offset/length pairs decoded above, open the +# ext2 filesystem image and overwrite the unused areas. +# it would be handy to truncate areas (make them sparse) so that they +# don't actually take up space on disk to... + +def perform_wipe(filename, wipes = [[0, 0]]): + progress_counter = 0.0 + percentage = 100.0 / len(wipes) + empty_space = PADDING * WRITE_SIZE + + #f = open(filename, 'w') + if dangerous: + mode = os.O_WRONLY|os.EX_CANTCREAT + else: + mode = os.O_RDONLY|os.EX_CANTCREAT + fd = os.open(filename, mode) + + # Don't waste space on a tty, display a progress percentage instead. + if sys.stdout.isatty(): + end = '\r' + else: + end = '\n' + for offset, length in wipes: + progress_counter += percentage + sys.stdout.write("wiping position %16d for %16d bytes (%5.1f%%)%s" % + (offset, length, progress_counter, end)) + #f.seek(offset) + os.lseek(fd, offset, 0) + #print 'currently at (before) ' + `f.tell()` + #print 'currently at (before) ' + `os.tell(fd)` + # only write 256kB at a time, since we can stick that in a buffer + # and not have Python regenerate HUGE arrays each time + if 1: + while length >= WRITE_SIZE and length > 0: + #f.write(empty_space) + #length -= WRITE_SIZE + if dangerous: + length -= os.write(fd, empty_space) + else: + length -= WRITE_SIZE + #f.write('\xaa' * length) + #f.write('hello') + if dangerous: + os.write(fd, PADDING * length) + #print 'currently at (after) ' + `f.tell()` + #print 'currently at (after) ' + `os.tell(fd)` + #f.close() + os.close(fd) + if sys.stdout.isatty(): + print + if verbose: + print 'All done! Hopefully your filesystem is not toast.' + +if __name__ == '__main__': + main() + diff --git a/livecd.sh b/livecd.sh index 0a94aafa..afce9cb2 100755 --- a/livecd.sh +++ b/livecd.sh @@ -132,6 +132,7 @@ Flags: seen ;; esac + dpkg -l livecd-rootfs # get our version # in the log. debootstrap $STE $ROOT $MIRROR # Just make a few things go away, which lets us skip a few other things.