01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
package algs55; // section 5.5
import stdlib.*;
import algs52.TST;
/* ***********************************************************************
* Compilation: javac LZW.java
* Execution: java LZW - < input.txt (compress)
* Execution: java LZW + < input.txt (expand)
* Dependencies: BinaryIn.java BinaryOut.java
*
* Compress or expand binary input from standard input using LZW.
*
* WARNING: STARTING WITH ORACLE JAVA 6, UPDATE 7 the SUBSTRING
* METHOD TAKES TIME AND SPACE LINEAR IN THE SIZE OF THE EXTRACTED
* SUBSTRING (INSTEAD OF CONSTANT SPACE AND TIME AS IN EARLIER
* IMPLEMENTATIONS).
*
* See <a href = "http://java-performance.info/changes-to-string-java-1-7-0_06/">this article</a>
* for more details.
*
**************************************************************************/
public class LZW {
private static BinaryIn binaryIn;
private static BinaryOut binaryOut;
private static final int R = 256; // number of input chars
private static final int L = 4096; // number of codewords = 2^W
private static final int W = 12; // codeword width
public static void compress() {
String input = binaryIn.readString();
TST<Integer> st = new TST<>();
for (int i = 0; i < R; i++)
st.put("" + (char) i, i);
int code = R+1; // R is codeword for EOF
while (input.length() > 0) {
String s = st.longestPrefixOf(input); // Find max prefix match s.
binaryOut.write(st.get(s), W); // Print s's encoding.
int t = s.length();
if (t < input.length() && code < L) // Add s to symbol table.
st.put(input.substring(0, t + 1), code++);
input = input.substring(t); // Scan past s in input.
}
binaryOut.write(R, W);
binaryOut.close();
}
public static void expand() {
String[] st = new String[L];
int i; // next available codeword value
// initialize symbol table with all 1-character strings
for (i = 0; i < R; i++)
st[i] = "" + (char) i;
st[i++] = ""; // (unused) lookahead for EOF
int codeword = binaryIn.readInt(W);
String val = st[codeword];
while (true) {
binaryOut.write(val);
codeword = binaryIn.readInt(W);
if (codeword == R) break;
String s = st[codeword];
if (i == codeword) s = val + val.charAt(0); // special case hack
if (i < L) st[i++] = val + s.charAt(0);
val = s;
}
binaryOut.close();
}
public static void main(String[] args) {
String txtFile = "data/genomeTiny.txt";
String binFile = "/tmp/genomeTiny.bin";
//args = new String[] { "+" }; binaryIn = new BinaryIn(binFile); binaryOut = new BinaryOut();
args = new String[] { "-" }; binaryIn = new BinaryIn(txtFile); binaryOut = new BinaryOut(binFile);
if (args[0].equals("-")) compress();
else if (args[0].equals("+")) expand();
else throw new Error("Illegal command line argument");
}
}
|