001package algs55; // section 5.5
002import stdlib.*;
003import algs24.MinPQ;
004/* ***********************************************************************
005 *  Compilation:  javac Huffman.java
006 *  Execution:    java Huffman - < input.txt   (compress)
007 *  Execution:    java Huffman + < input.txt   (expand)
008 *  Dependencies: BinaryIn.java BinaryOut.java
009 *  Data files:   http://algs4.cs.princeton.edu/55compression/abra.txt
010 *                http://algs4.cs.princeton.edu/55compression/tinytinyTale.txt
011 *
012 *  Compress or expand a binary input stream using the Huffman algorithm.
013 *
014 *  % java Huffman - < abra.txt | java BinaryDump 60
015 *  010100000100101000100010010000110100001101010100101010000100
016 *  000000000000000000000000000110001111100101101000111110010100
017 *  120 bits
018 *
019 *  % java Huffman - < abra.txt | java Huffman +
020 *  ABRACADABRA!
021 *
022 *************************************************************************/
023
024public class Huffman {
025        private static BinaryIn binaryIn;
026        private static BinaryOut binaryOut;
027
028        // alphabet size of extended ASCII
029        private static final int R = 256;
030
031        // Huffman trie node
032        private static class Node implements Comparable<Node> {
033                public final char ch;
034                public final int freq;
035                public final Node left, right;
036
037                public Node(char ch, int freq, Node left, Node right) {
038                        this.ch    = ch;
039                        this.freq  = freq;
040                        this.left  = left;
041                        this.right = right;
042                }
043
044                // is the node a leaf node?
045                private boolean isLeaf() {
046                        assert (left == null && right == null) || (left != null && right != null);
047                        return (left == null && right == null);
048                }
049
050                // compare, based on frequency
051                public int compareTo(Node that) {
052                        return this.freq - that.freq;
053                }
054        }
055
056
057        // compress bytes from standard input and write to standard output
058        public static void compress() {
059                // read the input
060                String s = binaryIn.readString();
061                char[] input = s.toCharArray();
062
063                // tabulate frequency counts
064                int[] freq = new int[R];
065                for (int i = 0; i < input.length; i++)
066                        freq[input[i]]++;
067
068                // build Huffman trie
069                Node root = buildTrie(freq);
070                writeTrie(root);
071                printTrie (root);
072
073                // build code table
074                String[] st = new String[R];
075                buildCode(st, root, "");
076
077                // print number of bytes in original uncompressed message
078                binaryOut.write(input.length);
079                StdOut.print ("Length: " + input.length);
080
081                // use Huffman code to encode input
082                for (int i = 0; i < input.length; i++) {
083                        String code = st[input[i]];
084                        for (int j = 0; j < code.length(); j++) {
085                                if (code.charAt(j) == '0') {
086                                        binaryOut.write(false);
087                                }
088                                else if (code.charAt(j) == '1') {
089                                        binaryOut.write(true);
090                                }
091                                else throw new IllegalStateException("Illegal state");
092                        }
093                }
094
095                // close output stream
096                binaryOut.close();
097        }
098
099        // expand Huffman-encoded input from standard input and write to standard output
100        public static void expand() {
101
102                // read in Huffman trie from input stream
103                Node root = readTrie();
104                printTrie (root);
105
106                // number of bytes to write
107                int length = binaryIn.readInt();
108
109                StdOut.print ("Length: " + length);
110                // decode using the Huffman trie
111                for (int i = 0; i < length; i++) {
112                        Node x = root;
113                        while (!x.isLeaf()) {
114                                boolean bit = binaryIn.readBoolean();
115                                if (bit) x = x.right;
116                                else     x = x.left;
117                        }
118                        binaryOut.write(x.ch);
119                }
120                binaryOut.close();
121        }
122
123        // build the Huffman trie given frequencies
124        private static Node buildTrie(int[] freq) {
125
126                // initialze priority queue with singleton trees
127                MinPQ<Node> pq = new MinPQ<>();
128                for (char i = 0; i < R; i++)
129                        if (freq[i] > 0)
130                                pq.insert(new Node(i, freq[i], null, null));
131
132                // merge two smallest trees
133                while (pq.size() > 1) {
134                        Node left  = pq.delMin();
135                        Node right = pq.delMin();
136                        Node parent = new Node('\0', left.freq + right.freq, left, right);
137                        pq.insert(parent);
138                }
139                return pq.delMin();
140        }
141
142
143        // make a lookup table from symbols and their encodings
144        private static void buildCode(String[] st, Node x, String s) {
145                if (!x.isLeaf()) {
146                        buildCode(st, x.left,  s + '0');
147                        buildCode(st, x.right, s + '1');
148                }
149                else {
150                        st[x.ch] = s;
151                }
152        }
153
154        // write bitstring-encoded trie to standard output
155        private static void writeTrie(Node x) {
156                //StdOut.println ("wrote " + x.isLeaf ());
157                if (x.isLeaf()) {
158                        //StdOut.println ("wrote " + x.ch);
159                        binaryOut.write(true);
160                        binaryOut.write(x.ch, 8);
161                } else {
162                        binaryOut.write(false);
163                        writeTrie(x.left);
164                        writeTrie(x.right);
165                }
166        }
167
168        private static Node readTrie() {
169                boolean isLeaf = binaryIn.readBoolean();
170                //StdOut.println ("read " + isLeaf);
171                if (isLeaf) {
172                        char ch = binaryIn.readChar(8);
173                        //StdOut.println ("read " + ch);
174                        return new Node(ch, -1, null, null);
175                } else {
176                        Node left = readTrie();
177                        Node right = readTrie();
178                        return new Node('\0', -1, left, right);
179                }
180        }
181        private static void printTrie(Node x) { printTrie(x, ""); }
182        private static void printTrie(Node x, String pre) {
183                if (x.isLeaf()) {
184                        StdOut.format ("%c %s\n", x.ch, pre);
185                }
186                if (x.left!=null)  printTrie(x.left, pre + "0");
187                if (x.right!=null) printTrie(x.right, pre + "1");
188        }
189
190        // seems to be broken...
191        public static void main(String[] args) {
192                String txtFile = "data/abra.txt";
193                String outFile = "/tmp/abra.txt";
194                String binFile = "/tmp/abra.bin";
195
196                //args = new String[] { "-" }; BinaryStdIn.fromFile (txtFile); BinaryStdOut.toFile (binFile);
197                //args = new String[] { "+" }; BinaryStdIn.fromFile (binFile); BinaryStdOut.toFile (outFile);
198
199                //args = new String[] { "-" }; binaryIn = new BinaryIn(txtFile); binaryOut = new BinaryOut(binFile);
200                //args = new String[] { "+" }; binaryIn = new BinaryIn(binFile); binaryOut = new BinaryOut(outFile);
201
202                if      (args[0].equals("-")) compress();
203                else if (args[0].equals("+")) expand();
204                else throw new Error("Illegal command line argument");
205        }
206
207}