|
RLE |
|
import java.io.*; /** * Ported to Java by Tim Tyler, July 2001 - http://mandala.co.uk/birle/<P> * * I modified the program as follows:<BR> * + No longer sensitive" to leading zeros in the file.<BR> * + Now required three consecutive characters (rather than two) * before RLE coding kicks in.<P> * * David Scott's comments follow:<P> * * This has been changed to be bijective and unadulterated * meaning that for any file X then compress(uncompress(X))=X * and uncompress(compress(X))=X. This was not the case in * the earlier version by Mark.<P> * * This mode is done by David A. Scott 2000 Jan 5.<P> * Use at your own risk.<P> * I got it to work using DJGPP GNU C port to DOS.<P> * * The main changes are the EOF treatment and the fact * that very long repeats in the old version are of form * aa255a255a255aN where the a255 repeated for as many times as needed * the undulterated form of this kind of exansion is * aaNaaaa where the trailing a's represent 256 a's.<P> * * These changes are such that the method will always compress * as well as the old RLE or better. In fact for some files with * very long repeats it will compress twice as good. * the first savings occur at repeat of 258 as shown in * example:<P> * baaaa..<258 a's total>...aac<P> * OLD RLE<P> * baa255a0c<P> * NEW RLE<P> * baa0ac<P> * * In the old method aa0b and aa0a have same meaning or the * aa0a form not really used since aa1 is same thing. What the * old method failed to do was make wise use of the patterns * so that information is being added to the file as it compresses * so it should not be used if one is encyrpting since a unadulterated * RLE is available that always matches or beats the old RLE method.<P> * * Another example more obvious:<P> * take the compressed file<P> * aa3a3bc<P> * this when uncompressed and recompressed old way becomes<P> * aa7bc <P> * which means the first file could not be a result of * the current RLE compression meaning if it was the result * of guessing a key for encryption that key used is wrong.<P> * One should avoid use compression methods that aid someone * in breaking the code.<P> * * In the new method<P> * aa3abc<P> * when uncompressed and rcompressed back comes back as same file.<P> * * * RLE.CPP<P> * * Mark Nelson<P> * March 8, 1996<P> * http://web2.airmail.net/markn<P> * * DESCRIPTION<BR> * -----------<P> * * This program performs a Run Length Encoding function on an * input file/stream, and sends the result to an output file * or stream. In the output stream, any two consecutive * characters with the same value flag a run. A byte following * those two characters gives the count of *additional* * repeat characters, which can be anything from 0 to 255.<P> * * Using the RLE program as a front end to BWT avoids * pathologically slow sorts that occur when the input stream * has long sequences of identical characters. (Which means * comparison functions have to spend lots of time on a pair * of strings before deciding who is larger.)<P> * * This program takes two arguments: an input file and an output * file. You can leave off one argument and send your output to * stdout. Leave off two arguments and read your input from stdin * as well.<P> * * This program accompanies my article "Data Compression with the * Burrows-Wheeler Transform."<P> * * Build Instructions<BR> * ------------------<P> * * Define the constant unix for UNIX or UNIX-like systems. The * use of this constant turns off the code used to force the MS-DOS * file system into binary mode. g++ already does this, your UNIX C++ * compiler might also.<P> * * Borland C++ 4.5 16 bit : bcc -w rle.cpp<BR> * Borland C++ 4.5 32 bit : bcc32 -w rle.cpp<BR> * Microsoft Visual C++ 1.52 : cl /W4 rle.cpp<BR> * Microsoft Visual C++ 2.1 : cl /W4 rle.cpp<BR> * g++ : g++ -o rle rle.cpp<P> * * Typical Use<BR> * -----------<P> * * rle < raw-file | bwt | mtf | rle | ari > compressed-file<P> * */ public class RLE { final static boolean report_times = true; final static boolean report_size = true; final static boolean report_filenames = true; FileOutputStream out = null; FileInputStream in = null; byte[] array = new byte[1]; /** * Compresses input_file and store the results in output_file */ public void compress(String input_file, String output_file) throws IOException { long start_time; if (report_filenames) { printf("Original file: " + input_file); printf("Compressed file: " + output_file); } if (report_times) { start_time = System.currentTimeMillis(); } int zerf = 0; int onef = 0; try { in = new FileInputStream(input_file); out = new FileOutputStream(output_file); byte[] _array = new byte[1]; int rb; int last = -1; int lastbo = -2; // int lastbt = -3; // probably hinders rather than helps /* means already first characater was a zero */ /* I would change this to -1 since you could have a file with a single leading zero. If you do you get an extra zero with his value of zero. I am not changeing this to -1 here so that I will exactly match his method for a files where the variable "count" is 255 or less. So except for the possible saveing of a byte at end of file our outputs files will match for many cases */ int c; while ((c = getc()) != -1) {/* Read the next character. */ putc(c); // output it... // if (lastbt == lastbo) { if (lastbo == last) { if (c == last) { int count = 0; while (( c = getc()) >= 0 ) { if (c == last) { count++; } else { break; } } if ( c < 0 ) { /* EOF handling */ if ((count >= 256) && (count % 256 == 0)) { count -=256; } else { if ( count == 0 ) count = -1; } } if (count >=0) { putc(count); } while ((count = count - 256) >= 0) { putc(last); } if (c >= 0) { putc(c); } } } // } // lastbt = lastbo; lastbo = last; last = c; } in.close(); out.close(); } catch(Exception e) { printf("Error while processing file:"); e.printStackTrace(); } if (report_times) { start_time = System.currentTimeMillis() - start_time; printf("Time taken:" + ((start_time / 10) / 100F) + " seconds."); } if (report_size) { File f1 = new File(input_file); File f2 = new File(output_file); long f1l = f1.length(); long f2l = f2.length(); printf("Original file size: " + f1l + " bytes."); printf("Compressed file size: " + f2l + " bytes."); if (f1l > f2l) { printf("Compressed by " + (f1l - f2l) + " bytes."); } else { printf("*** File gained in size by " + (f2l - f1l) + " bytes. ***"); } // fudge divide by zero? if (f1l == 0L) { f1l = 1L; } printf("Compression ratio: " + ((((f1l - f2l) * 10000) / f1l) / 100F) + "%"); } printf(""); } int getc() throws IOException { int rb; if ((rb = in.read(array, 0, 1)) == -1) {/* Read the next character. */ return -1; // EOF flag... } return array[0] & 0xff; } void putc(int i) throws IOException { out.write((byte)i); } static void printf(String s) { System.out.println(s); } /** * Handles the Commmand-line interface to the compressor */ public static void main(String[] args) throws IOException { RLE ca = new RLE(); String input_file = null; String output_file = null; printf("Bijective RLE compressor version of July 12, 2001."); if (args.length > 0) { input_file = args[0]; } else { printf("Error: No input file specified"); System.exit(0); } if (args.length > 1) { output_file = args[1]; } else { printf("Error: No output file specified"); System.exit(0); } ca.compress(input_file, output_file); } }
|
RLE |
|