001package algs55; // section 5.5 002import stdlib.*; 003import algs24.MinPQ; 004/* *********************************************************************** 005 * Compilation: javac Huffman.java 006 * Execution: java Huffman - < input.txt (compress) 007 * Execution: java Huffman + < input.txt (expand) 008 * Dependencies: BinaryIn.java BinaryOut.java 009 * Data files: http://algs4.cs.princeton.edu/55compression/abra.txt 010 * http://algs4.cs.princeton.edu/55compression/tinytinyTale.txt 011 * 012 * Compress or expand a binary input stream using the Huffman algorithm. 013 * 014 * % java Huffman - < abra.txt | java BinaryDump 60 015 * 010100000100101000100010010000110100001101010100101010000100 016 * 000000000000000000000000000110001111100101101000111110010100 017 * 120 bits 018 * 019 * % java Huffman - < abra.txt | java Huffman + 020 * ABRACADABRA! 021 * 022 *************************************************************************/ 023 024public class Huffman { 025 private static BinaryIn binaryIn; 026 private static BinaryOut binaryOut; 027 028 // alphabet size of extended ASCII 029 private static final int R = 256; 030 031 // Huffman trie node 032 private static class Node implements Comparable<Node> { 033 public final char ch; 034 public final int freq; 035 public final Node left, right; 036 037 public Node(char ch, int freq, Node left, Node right) { 038 this.ch = ch; 039 this.freq = freq; 040 this.left = left; 041 this.right = right; 042 } 043 044 // is the node a leaf node? 045 private boolean isLeaf() { 046 assert (left == null && right == null) || (left != null && right != null); 047 return (left == null && right == null); 048 } 049 050 // compare, based on frequency 051 public int compareTo(Node that) { 052 return this.freq - that.freq; 053 } 054 } 055 056 057 // compress bytes from standard input and write to standard output 058 public static void compress() { 059 // read the input 060 String s = binaryIn.readString(); 061 char[] input = s.toCharArray(); 062 063 // tabulate frequency counts 064 int[] freq = new int[R]; 065 for (int i = 0; i < input.length; i++) 066 freq[input[i]]++; 067 068 // build Huffman trie 069 Node root = buildTrie(freq); 070 writeTrie(root); 071 printTrie (root); 072 073 // build code table 074 String[] st = new String[R]; 075 buildCode(st, root, ""); 076 077 // print number of bytes in original uncompressed message 078 binaryOut.write(input.length); 079 StdOut.print ("Length: " + input.length); 080 081 // use Huffman code to encode input 082 for (int i = 0; i < input.length; i++) { 083 String code = st[input[i]]; 084 for (int j = 0; j < code.length(); j++) { 085 if (code.charAt(j) == '0') { 086 binaryOut.write(false); 087 } 088 else if (code.charAt(j) == '1') { 089 binaryOut.write(true); 090 } 091 else throw new IllegalStateException("Illegal state"); 092 } 093 } 094 095 // close output stream 096 binaryOut.close(); 097 } 098 099 // expand Huffman-encoded input from standard input and write to standard output 100 public static void expand() { 101 102 // read in Huffman trie from input stream 103 Node root = readTrie(); 104 printTrie (root); 105 106 // number of bytes to write 107 int length = binaryIn.readInt(); 108 109 StdOut.print ("Length: " + length); 110 // decode using the Huffman trie 111 for (int i = 0; i < length; i++) { 112 Node x = root; 113 while (!x.isLeaf()) { 114 boolean bit = binaryIn.readBoolean(); 115 if (bit) x = x.right; 116 else x = x.left; 117 } 118 binaryOut.write(x.ch); 119 } 120 binaryOut.close(); 121 } 122 123 // build the Huffman trie given frequencies 124 private static Node buildTrie(int[] freq) { 125 126 // initialze priority queue with singleton trees 127 MinPQ<Node> pq = new MinPQ<>(); 128 for (char i = 0; i < R; i++) 129 if (freq[i] > 0) 130 pq.insert(new Node(i, freq[i], null, null)); 131 132 // merge two smallest trees 133 while (pq.size() > 1) { 134 Node left = pq.delMin(); 135 Node right = pq.delMin(); 136 Node parent = new Node('\0', left.freq + right.freq, left, right); 137 pq.insert(parent); 138 } 139 return pq.delMin(); 140 } 141 142 143 // make a lookup table from symbols and their encodings 144 private static void buildCode(String[] st, Node x, String s) { 145 if (!x.isLeaf()) { 146 buildCode(st, x.left, s + '0'); 147 buildCode(st, x.right, s + '1'); 148 } 149 else { 150 st[x.ch] = s; 151 } 152 } 153 154 // write bitstring-encoded trie to standard output 155 private static void writeTrie(Node x) { 156 //StdOut.println ("wrote " + x.isLeaf ()); 157 if (x.isLeaf()) { 158 //StdOut.println ("wrote " + x.ch); 159 binaryOut.write(true); 160 binaryOut.write(x.ch, 8); 161 } else { 162 binaryOut.write(false); 163 writeTrie(x.left); 164 writeTrie(x.right); 165 } 166 } 167 168 private static Node readTrie() { 169 boolean isLeaf = binaryIn.readBoolean(); 170 //StdOut.println ("read " + isLeaf); 171 if (isLeaf) { 172 char ch = binaryIn.readChar(8); 173 //StdOut.println ("read " + ch); 174 return new Node(ch, -1, null, null); 175 } else { 176 Node left = readTrie(); 177 Node right = readTrie(); 178 return new Node('\0', -1, left, right); 179 } 180 } 181 private static void printTrie(Node x) { printTrie(x, ""); } 182 private static void printTrie(Node x, String pre) { 183 if (x.isLeaf()) { 184 StdOut.format ("%c %s\n", x.ch, pre); 185 } 186 if (x.left!=null) printTrie(x.left, pre + "0"); 187 if (x.right!=null) printTrie(x.right, pre + "1"); 188 } 189 190 // seems to be broken... 191 public static void main(String[] args) { 192 String txtFile = "data/abra.txt"; 193 String outFile = "/tmp/abra.txt"; 194 String binFile = "/tmp/abra.bin"; 195 196 //args = new String[] { "-" }; BinaryStdIn.fromFile (txtFile); BinaryStdOut.toFile (binFile); 197 //args = new String[] { "+" }; BinaryStdIn.fromFile (binFile); BinaryStdOut.toFile (outFile); 198 199 //args = new String[] { "-" }; binaryIn = new BinaryIn(txtFile); binaryOut = new BinaryOut(binFile); 200 //args = new String[] { "+" }; binaryIn = new BinaryIn(binFile); binaryOut = new BinaryOut(outFile); 201 202 if (args[0].equals("-")) compress(); 203 else if (args[0].equals("+")) expand(); 204 else throw new Error("Illegal command line argument"); 205 } 206 207}