import java.io.*;

/**
 *  CompressArithmetic.java<P>
 *
 *  Mark Nelson<BR>
 *  March 8, 1996<BR>
 *  http://web2.airmail.net/markn<BR>
 *  **** moded by David Scott in May of 2001 to make bijective<BR>
 * Ported to Java by Tim Tyler, July 2001 - http://mandala.co.uk/biac/<P>
 *  
 * DESCRIPTION<BR>
 * -----------<P>
 *
 *  This program performs an order-0 adaptive arithmetic encoding<BR>
 *  function on an input file/stream, and sends the result to an<BR>
 *  output file or stream.<P>
 *
 *  This program contains the source code from the 1987 CACM
 *  article by Witten, Neal, and Cleary.  I have taken the
 *  source modules and combined them into this single file for
 *  ease of distribution and compilation.  Other than that,
 *  the code is essentially unchanged.<P>
 *
 *  This program takes two arguments: an input file and an output
 *  file.  You can leave off one argument and send your output to
 *  stdout.  Leave off two arguments and read your input from stdin
 *  as well.<P>
 *
 *  This program accompanies my article "Data Compression with the
 *  Burrows-Wheeler Transform."<P>
 *
 * Build Instructions<BR>
 * ------------------<P>
 *
 *  Define the constant unix for UNIX or UNIX-like systems.  The
 *  use of this constant turns off the code used to force the MS-DOS
 *  file system into binary mode.  g++ already does this, and your
 *  UNIX C++ compiler might also.<P>
 *
 *  Borland C++ 4.5 16 bit    : bcc -w ari.cpp<BR>
 *  Borland C++ 4.5 32 bit    : bcc32 -w ari.cpp<BR>
 *  Microsoft Visual C++ 1.52 : cl /W4 ari.cpp<BR>
 *  Microsoft Visual C++ 2.1  : cl /W4 ari.cpp<BR>
 *  g++                       : g++ -o ari ari.cpp<P>
 *
 * Typical Use<BR>
 * -----------<P>
 *
 *  rle < raw-file | bwt | mtf | rle | ari > compressed-file<P>
 *
 */
   public class CompressArithmetic {
      final static int No_of_chars = 256;                 /* Number of character symbols      */
      static FileOutputStream out = null;
   
   // The number of symbols is 256 not 257 EOF is not needed
   /* TRANSLATION TABLES BETWEEN CHARACTERS AND SYMBOL INDEXES. */
      final static int  No_of_symbols = No_of_chars;   /* Total number of symbols          */
   
      static int[] char_to_index = new int[No_of_chars];         /* To index from character          */
      static char[] index_to_char = new char[No_of_symbols+1]; /* To character from index    */
   
   /* ADAPTIVE SOURCE MODEL */
      static long[] freq = new long[No_of_symbols+1];      /* Symbol frequencies                       */
      static long[] cum_freq = new long[No_of_symbols+1];  /* Cumulative symbol frequencies            */
   
   // only 1 to 256 used for the symbols
   
   /* DECLARATIONS USED FOR ARITHMETIC ENCODING AND DECODING */
   
   /* SIZE OF ARITHMETIC CODE VALUES. */
      final static int  Code_value_bits = 62;              /* Number of bits in a code value   */
   
   /* BIT OUTPUT ROUTINES. */
   
   /* THE BIT BUFFER. */
      static int buffer;                     /* Bits buffered for output                 */
      static int bits_to_go;                 /* Number of bits free in buffer            */
      static int zbuffer;                    /* Number of zero bytes buffered up         */
      static int lobuffer;                   /* one if z lo tail in effect */
   
   
   /* INITIALIZE FOR BIT OUTPUT. */
      static void start_outputing_bits() {
         buffer = 0;                                 /* Buffer is empty to start */
         bits_to_go= 8;                              /* with.                    */
         zbuffer = 0;            /* number of zero bytes buffered up */
         lobuffer = 0;           /* one if possible cuttoff */
      }
   
   
   /* OUTPUT A BIT. */
      static void output_bit(int bit) throws IOException
      {   buffer >>= 1; 
         if (bit != 0) buffer |= 0x80;               /* Put bit in top of buffer.*/
         bits_to_go -= 1;
         if (bits_to_go==0) {                        /* Output buffer if it is   */
            if (buffer != 0) {
               if (lobuffer != 0) writeOut(0x40);
               if (zbuffer > 0 && buffer == 0x40) lobuffer = 1; 
               if (buffer != 0x40) lobuffer = 0;     /* 0x40 just because its cool */
               for(;zbuffer > 0; zbuffer--) writeOut(0);
               if (lobuffer != 1) writeOut(buffer);  
            }
            else zbuffer++;
            bits_to_go = 8;
            buffer = 0;
         }
      }
   
   
   /* FLUSH OUT THE LAST BITS. */
      static void done_outputing_bits() throws IOException
      {  /* write out remaining bits most of the time */
         if (buffer != 0)  {
            if (bits_to_go != 0) buffer >>= bits_to_go;
            if (lobuffer != 0) writeOut(0x40);
            if (zbuffer > 0 && buffer == 0x40) lobuffer = 1; 
            if (buffer != 0x40) lobuffer = 0;
            for(;zbuffer > 0; zbuffer--) writeOut(0);
            if (lobuffer != 1) writeOut(buffer);  
         }
      }
   
   /* CURRENT STATE OF THE ENCODING. */
   
      static long low, high, swape;     /* Ends of the current code region          */
      static long bits_to_follow;       /* Number of opposite bits to output after  */
                                        /* the next bit.                            */
   
   /* OUTPUT BITS PLUS FOLLOWING OPPOSITE BITS. */
      static void bit_plus_follow(int bit) throws IOException {
         output_bit(bit);                           /* Output the bit.          */
         while(bits_to_follow>0) {
            output_bit((bit == 0) ? 1 : 0);          /* Output bits_to_follow    */
            bits_to_follow -= 1;                     /* opposite bits. Set       */
         }                                           /* bits_to_follow to zero.  */
      }
   
   
   
   /* ARITHMETIC ENCODING ALGORITHM. */
   
   /* START ENCODING A STREAM OF SYMBOLS. */
      static void start_encoding() {
         low = 0;                                    /* Full code range.         */
         high =(((long)1<<Code_value_bits)-1);
         bits_to_follow = 0;                         /* No bits to follow next.  */
      }
   
   
      static long dss(long r,long top,long bot) throws IOException {
         long t1,t2; /* r could be 64 bits top and bot 32 bits */
                  /* top < bot */
         if (bot == 0) fprintf(" not possible \n");
         t1 = r/bot;
         t2 = r -(t1*bot);
         t1 = t1*top;
         t2 = t2*top;
         t2 = t2/bot;
         return(t1 + t2);
      }
   
   
   /* ENCODE A SYMBOL. */
      static void encode_symbol(int symbol,long cum_freq[]) throws IOException {
         long range;           /* Size of the current code region          */
         range =(long)(high-low)+1;
         high = low   +  dss(range,cum_freq[symbol-1],cum_freq[0]) -1;
         //    (range*cum_freq[symbol-1])/cum_freq[0]-1; 
         low = low +   dss(range, cum_freq[symbol],cum_freq[0]);
         //    (range*cum_freq[symbol])/cum_freq[0];
      
         low ^=(((long)1<<Code_value_bits)-1); /* mapping so largest segment at bottom */
         high ^=(((long)1<<Code_value_bits)-1); /* key is so symbol 2 will have at least */
         swape = low;  /* one "one bit" so bijective at end */
         low = high;
         high = swape;
      
         for(;;) {                                  /* Loop to output bits.     */
            if (high<(2*((((long)1<<Code_value_bits)-1)/4+1))) {
               bit_plus_follow(0);                 /* Output 0 if in low half. */
            }
            else if (low>=(2*((((long)1<<Code_value_bits)-1)/4+1))) {                   /* Output 1 if in high half.*/
               bit_plus_follow(1);
               low -=(2*((((long)1<<Code_value_bits)-1)/4+1));
               high -=(2*((((long)1<<Code_value_bits)-1)/4+1));                       /* Subtract offset to top.  */
            }
            else if (low>=((((long)1<<Code_value_bits)-1)/4+1)                 /* Output an opposite bit   */
                    && high<(3*((((long)1<<Code_value_bits)-1)/4+1))) {              /* later if in middle half. */
               bits_to_follow += 1;
               low -=((((long)1<<Code_value_bits)-1)/4+1);                   /* Subtract offset to middle*/
               high -=((((long)1<<Code_value_bits)-1)/4+1);
            }
            else 
               break;                             /* Otherwise exit loop.     */
            low = 2*low;
            high = 2*high+1;                        /* Scale up code range.     */
         }
         low ^=(((long)1<<Code_value_bits)-1);
         high ^=(((long)1<<Code_value_bits)-1);
         swape = low;
         low = high;
         high = swape;
      }
   
   
   /* FINISH ENCODING THE STREAM. */
      static void done_encoding() throws IOException {   
         if (bits_to_follow > 0) { /* saving at least one bit here */
            bit_plus_follow(1);
         } 
         else
         {
            if (high !=(((long)1<<Code_value_bits)-1)) bit_plus_follow(1);
         }
      }
   
   
   /* THE ADAPTIVE SOURCE MODEL */
   
   /* INITIALIZE THE MODEL. */
      static void start_model() {
         int i;
         for(i = 0; i<No_of_chars; i++) {           /* Set up tables that       */
            char_to_index[i] = i+1;                 /* translate between symbol */
            index_to_char[i+1] =(char) i; /* indexes and characters.  */
         }
      
         for(i = 0; i<=No_of_symbols; i++) {        /* Set up initial frequency */
            freq[i] = 1;                            /* counts to be one for all */
            cum_freq[i] = No_of_symbols-i;          /* symbols.                 */
         }
      
         freq[0] = 0;                                /* Freq[0] must not be the  */
      }                                               /* same as freq[1].         */
   
   
   /* UPDATE THE MODEL TO ACCOUNT FOR A NEW SYMBOL. */
      static void update_model(int symbol) throws IOException {   
         int i;                      /* New index for symbol                     */
         for(i = symbol; freq[i]==freq[i-1]; i--) ; /* Find symbol's new index. */
         if (i<symbol) {
            int ch_i, ch_symbol;
            ch_i = index_to_char[i];                /* Update the translation   */
            ch_symbol = index_to_char[symbol];      /* tables if the symbol has */
            index_to_char[i] =(char) ch_symbol;           /* moved.                   */
            index_to_char[symbol] =(char) ch_i;
            char_to_index[ch_i] = symbol;
            char_to_index[ch_symbol] = i;
         }
         freq[i] ++;                               /* Increment the frequency  */
         while(i>0) {                               /* count for the symbol and */
            i -= 1;                                 /* update the cumulative    */
            cum_freq[i] ++;                       /* frequencies.             */
         }
      }
   
   
      static void fprintf(String s) {
         System.out.println(s);
      }
   
   
   /*
      static void fprintf(String s) {
         Log.log(s);
      }
   
   
      static void putc(char c) {
         Log.log("" + c);
      }
   
      static void putc(byte b) {
         Log.log("" + b);
      }
   
   
      static void putc(int i) {
         Log.log("" +(byte)i);
      }
   */
   
   
      static void writeOut(int i) throws IOException {
         out.write((byte)i);
      }
   
   
   /**
    * Compresses input_file and store the results in output_file
    */
      public static void compress(String input_file, String output_file) throws IOException {
         int zerf = 0;
         int onef = 0;
      
         FileInputStream in = null;
         try {
            in = new FileInputStream(input_file);
            out = new FileOutputStream(output_file);
         
            byte[] _array = new byte[1];
         
            int rb;
         
            start_model();                              /* Set up other modules.    */
            start_outputing_bits();
            start_encoding();
            for(;;) {                                  /* Loop through characters. */
               int ch;
               int symbol;
            
               if ((rb = in.read(_array, 0, 1)) == -1) {/* Read the next character. */
                  break;
               }
            
               ch = _array[0] & 0xff;
            
               symbol = char_to_index[ch];             /* Translate to an index.   */
               if (symbol == 1) zerf = 1;
               if (zerf == 1 && symbol == 2) onef = 1;
               if (symbol > 1) zerf = 0; 
               if (symbol > 2) onef = 0; 
               encode_symbol(symbol,cum_freq);         /* Encode that symbol.      */
               update_model(symbol);                   /* Update the model.        */
            }
         
            if ((zerf + onef) > 0) {
               encode_symbol(2, cum_freq);
               update_model(2);
            }
         
            // encode_symbol(EOF_symbol,cum_freq);     /* Encode the EOF symbol.   */
            done_encoding();                           /* Send the last few bits.  */
            done_outputing_bits();
         
            in.close();
            out.close();
         }
            catch(Exception e) {
               fprintf("Error while processing file:");
               e.printStackTrace();
            }
      }
   
   
   /**
    * Handles the Commmand-line interface to the compressor
    */
      public static void main(String[] args) throws IOException {
         String input_file = null;
         String output_file = null;
      
         fprintf("Bijective Arithmetic coding version May 30, 2001\n");
         fprintf("Arithmetic coding from ");
      
         if (args.length > 0) {
            input_file = args[0];
            fprintf(args[0]);
         } 
         else {
            fprintf("Error: No input file specified");
            System.exit(0);
         }
         fprintf(" to ");
         if (args.length > 1) {
            output_file = args[1];
            fprintf(args[1]);
         } 
         else {
            fprintf("Error: No output file specified");
            System.exit(0);
         }
      
         fprintf("\n");
      
         compress(input_file, output_file);
      }
   
   }