#!/usr/bin/perl # msexpand written by Paul Laufer, 2001, to help him learn perl ;) # - added fillup with zeros if expanded file is less than original; # - added binmode() to work on platforms which need it; by G. Knauf. # last change: 10-May-2002 gk. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # Microsoft compress file structure: # Uses a 12 bit (4k) sliding window Lempel Ziv variant. # # header: 13 bytes # int MAGIC1 = 0x44445a53; // "SZDD" # int MAGIC2 = 0x3327f088; # char MAGIC3 = 0x41; # char last_char_of_filename; // offset 0x09 # unsigned short int size_low; # unsigned short int size_high; # # data portion: # Flag byte followed by eight data elements, for each of the eight flag bits. # If a flag bit is 1, the corresponding element is a data byte (just copied). # If the flag bit is 0, the corresponding element is a code, comprised of two # bytes. The two bytes are divided into two parts. The upper 12 bits are the # offset into the 4k window, and the lower 4 bits are the length of the string, # minus 3. Thus the string length is between 3 and 18 bytes. # Flag byte Eight data elements # 10010011 # |||||||+----byte # ||||||+-----byte # |||||+------code # ||||+-------code # |||+--------byte # ||+---------code # |+----------code # +-----------byte # # The window is wrapped, ie: with an offset of 4092 and a length of 10, you # will get the last 4 bytes then the first 6 bytes of the window in the output. # # Of course, the window must be updated with expanded strings and bytes as they # are read. Oh, yeah, the window needs to be initialized with spaces, not # zeros. $MAGIC1 = 0x44445a53; $MAGIC2 = 0x3327f088; $MAGIC3 = 0x41; $WINSIZE = 4096; # Window Size $HEADSIZE = 14; # Size of file header sub LENGTH { my $x = shift; return ($x & 0x0F) + 3; } sub OFFSET { my ($x1, $x2) = @_; return (((($x2 & 0xF0) << 4) + $x1 + 0x0010) & 0x0FFF) } sub WRAPFIX { my $x = shift; return ($x & ($WINSIZE - 1)); } sub BITSET { my ($byte, $bit) = @_; return (($byte & (1<<$bit)) > 0); } # This sub directly translated from my C source. Runs slow in perl... sub LZ_expand { my ($input, $size_uncomp) = @_; local ($curr_pos, $location, $bit_map, $byte1, $byte2); local ($window[$WINSIZE], $length, $counter, $x, $max); # initialize window to all spaces (cleaner way?) for($x = 0; $x < $WINSIZE; $x += 1) { $window[$x] = ' '; } $curr_pos = 0; $index = $HEADSIZE; while($curr_pos < $size_uncomp) { $bit_map = unpack('C', substr($input, $index, 1)); $index++; if($index >= $size_orig) { return $curr_pos; } for($counter = 0; $counter < 8; $counter++) { if(!BITSET($bit_map, $counter)) { # Its a code, so process ($byte1, $byte2) = unpack('CC', substr($input, $index, 2)); $index += 2; if($index >= $size_orig) { return $curr_pos; } $length = LENGTH($byte2); $location = OFFSET($byte1, $byte2); while($length > 0) { $byte1 = $window[WRAPFIX($location)]; $window[WRAPFIX($curr_pos)] = $byte1; printf(OUTFILE "%c", $byte1); $curr_pos++; $location++; $length--; } } else { # Its just a data byte $byte1 = unpack('C', substr($input, $index, 1)); $index++; $window[WRAPFIX($curr_pos)] = $byte1; printf(OUTFILE "%c", $byte1); $curr_pos++; } if ($index >= $size_orig) { return $curr_pos; } } } return $curr_pos; } # start here if($#ARGV < 0) { print "Microsoft Compressed File Expander\n"; print "Written by Paul Laufer 2001-03-13\n\n"; print "Usage:\n\tmsexpand.pl FILE\n\n"; print "Where FILE is a valid Microsoft Compressed file. Files of this type\nusually have the last letter replaced with an underscore, ie rmquasar.vx_.\nThe expanded file will be the original filename with the last underscore\nreplaced with the original letter, ie rmquasar.vxd.\n\n"; exit(1); } open(INFILE,"<$ARGV[0]") or die "Can't open file for input:"; binmode(INFILE); undef $/; $input = ; close(INFILE); ($magic1, $magic2, $magic3, $lastchar, $size_low, $size_high) = unpack 'IICa1SS', substr($input, 0, $HEADSIZE); if( $magic1 != $MAGIC1 || $magic2 != $MAGIC2 || $magic3 != $MAGIC3 ) { print "Error: Input file is not a Microsoft Compress format.\n"; exit(1); } else { print "Input file appears to be Microsoft Compress format, proceeding\n"; } $outfile = $ARGV[0]; substr($outfile, -1, 1) = $lastchar; print "Output filename = ", $outfile, "\n"; $size_uncomp = ($size_high << 0x10) + $size_low; $size_orig = -s $ARGV[0]; print "Original file size: ", $size_uncomp, " bytes\n"; printf "Compression ratio: %.1f%%\n", $size_orig*100/$size_uncomp; open(OUTFILE, ">$outfile") or die "Can't open file for output:"; binmode(OUTFILE); # Time to start expanding the file $size_expand = LZ_expand($input, $size_uncomp); printf("Expanded size: %d bytes\n", $size_expand); printf(OUTFILE "%s", ("\x0" x ($size_uncomp-$size_expand))) if ($size_expand < $size_uncomp); close(OUTFILE); printf("Output file size: %d bytes\n", (stat($outfile))[7]);