001package algs55; // section 5.5
002import stdlib.*;
003import algs52.TST;
004/* ***********************************************************************
005 *  Compilation:  javac LZW.java
006 *  Execution:    java LZW - < input.txt   (compress)
007 *  Execution:    java LZW + < input.txt   (expand)
008 *  Dependencies: BinaryIn.java BinaryOut.java
009 *
010 *  Compress or expand binary input from standard input using LZW.
011 *
012 *  WARNING: STARTING WITH ORACLE JAVA 6, UPDATE 7 the SUBSTRING
013 *  METHOD TAKES TIME AND SPACE LINEAR IN THE SIZE OF THE EXTRACTED
014 *  SUBSTRING (INSTEAD OF CONSTANT SPACE AND TIME AS IN EARLIER
015 *  IMPLEMENTATIONS).
016 *
017 *  See <a href = "http://java-performance.info/changes-to-string-java-1-7-0_06/">this article</a>
018 *  for more details.
019 *
020 **************************************************************************/
021
022public class LZW {
023        private static BinaryIn binaryIn;
024        private static BinaryOut binaryOut;
025
026        private static final int R = 256;        // number of input chars
027        private static final int L = 4096;       // number of codewords = 2^W
028        private static final int W = 12;         // codeword width
029
030        public static void compress() {
031                String input = binaryIn.readString();
032                TST<Integer> st = new TST<>();
033                for (int i = 0; i < R; i++)
034                        st.put("" + (char) i, i);
035                int code = R+1;  // R is codeword for EOF
036
037                while (input.length() > 0) {
038                        String s = st.longestPrefixOf(input);  // Find max prefix match s.
039                        binaryOut.write(st.get(s), W);         // Print s's encoding.
040                        int t = s.length();
041                        if (t < input.length() && code < L)    // Add s to symbol table.
042                                st.put(input.substring(0, t + 1), code++);
043                        input = input.substring(t);            // Scan past s in input.
044                }
045                binaryOut.write(R, W);
046                binaryOut.close();
047        }
048
049
050        public static void expand() {
051                String[] st = new String[L];
052                int i; // next available codeword value
053
054                // initialize symbol table with all 1-character strings
055                for (i = 0; i < R; i++)
056                        st[i] = "" + (char) i;
057                st[i++] = "";                        // (unused) lookahead for EOF
058
059                int codeword = binaryIn.readInt(W);
060                String val = st[codeword];
061
062                while (true) {
063                        binaryOut.write(val);
064                        codeword = binaryIn.readInt(W);
065                        if (codeword == R) break;
066                        String s = st[codeword];
067                        if (i == codeword) s = val + val.charAt(0);   // special case hack
068                        if (i < L) st[i++] = val + s.charAt(0);
069                        val = s;
070                }
071                binaryOut.close();
072        }
073
074
075
076        public static void main(String[] args) {
077                String txtFile = "data/genomeTiny.txt";
078                String binFile = "/tmp/genomeTiny.bin";
079                //args = new String[] { "+" }; binaryIn = new BinaryIn(binFile); binaryOut = new BinaryOut();
080                args = new String[] { "-" }; binaryIn = new BinaryIn(txtFile); binaryOut = new BinaryOut(binFile);
081                if      (args[0].equals("-")) compress();
082                else if (args[0].equals("+")) expand();
083                else throw new Error("Illegal command line argument");
084        }
085
086}