001package algs55; // section 5.5 002import stdlib.*; 003import algs52.TST; 004/* *********************************************************************** 005 * Compilation: javac LZW.java 006 * Execution: java LZW - < input.txt (compress) 007 * Execution: java LZW + < input.txt (expand) 008 * Dependencies: BinaryIn.java BinaryOut.java 009 * 010 * Compress or expand binary input from standard input using LZW. 011 * 012 * WARNING: STARTING WITH ORACLE JAVA 6, UPDATE 7 the SUBSTRING 013 * METHOD TAKES TIME AND SPACE LINEAR IN THE SIZE OF THE EXTRACTED 014 * SUBSTRING (INSTEAD OF CONSTANT SPACE AND TIME AS IN EARLIER 015 * IMPLEMENTATIONS). 016 * 017 * See <a href = "http://java-performance.info/changes-to-string-java-1-7-0_06/">this article</a> 018 * for more details. 019 * 020 **************************************************************************/ 021 022public class LZW { 023 private static BinaryIn binaryIn; 024 private static BinaryOut binaryOut; 025 026 private static final int R = 256; // number of input chars 027 private static final int L = 4096; // number of codewords = 2^W 028 private static final int W = 12; // codeword width 029 030 public static void compress() { 031 String input = binaryIn.readString(); 032 TST<Integer> st = new TST<>(); 033 for (int i = 0; i < R; i++) 034 st.put("" + (char) i, i); 035 int code = R+1; // R is codeword for EOF 036 037 while (input.length() > 0) { 038 String s = st.longestPrefixOf(input); // Find max prefix match s. 039 binaryOut.write(st.get(s), W); // Print s's encoding. 040 int t = s.length(); 041 if (t < input.length() && code < L) // Add s to symbol table. 042 st.put(input.substring(0, t + 1), code++); 043 input = input.substring(t); // Scan past s in input. 044 } 045 binaryOut.write(R, W); 046 binaryOut.close(); 047 } 048 049 050 public static void expand() { 051 String[] st = new String[L]; 052 int i; // next available codeword value 053 054 // initialize symbol table with all 1-character strings 055 for (i = 0; i < R; i++) 056 st[i] = "" + (char) i; 057 st[i++] = ""; // (unused) lookahead for EOF 058 059 int codeword = binaryIn.readInt(W); 060 String val = st[codeword]; 061 062 while (true) { 063 binaryOut.write(val); 064 codeword = binaryIn.readInt(W); 065 if (codeword == R) break; 066 String s = st[codeword]; 067 if (i == codeword) s = val + val.charAt(0); // special case hack 068 if (i < L) st[i++] = val + s.charAt(0); 069 val = s; 070 } 071 binaryOut.close(); 072 } 073 074 075 076 public static void main(String[] args) { 077 String txtFile = "data/genomeTiny.txt"; 078 String binFile = "/tmp/genomeTiny.bin"; 079 //args = new String[] { "+" }; binaryIn = new BinaryIn(binFile); binaryOut = new BinaryOut(); 080 args = new String[] { "-" }; binaryIn = new BinaryIn(txtFile); binaryOut = new BinaryOut(binFile); 081 if (args[0].equals("-")) compress(); 082 else if (args[0].equals("+")) expand(); 083 else throw new Error("Illegal command line argument"); 084 } 085 086}