001package algs31;
002import stdlib.*;
003import algs32.BST;
004import algs33.RedBlackBST;
005import algs34.LinearProbingHashST;
006import algs34.SeparateChainingHashST;
007import algs35.ST;
008/* ***********************************************************************
009 *  Compilation:  javac FrequencyCounter.java
010 *  Execution:    java FrequencyCounter L < input.txt
011 *  Dependencies: ST.java StdIn.java StdOut.java
012 *  Data files:   http://algs4.cs.princeton.edu/41elementary/tnyTale.txt
013 *                http://algs4.cs.princeton.edu/41elementary/tale.txt
014 *                http://algs4.cs.princeton.edu/41elementary/tale.txt
015 *                http://algs4.cs.princeton.edu/41elementary/leipzig100K.txt
016 *                http://algs4.cs.princeton.edu/41elementary/leipzig300K.txt
017 *                http://algs4.cs.princeton.edu/41elementary/leipzig1M.txt
018 *
019 *  Read in a list of words from standard input and print out
020 *  the most frequently occurring word.
021 *
022 *  % java FrequencyCounter 1 < tinyTale.txt
023 *  it 10
024 *
025 *  % java FrequencyCounter 8 < tale.txt
026 *  business 122
027 *
028 *  % java FrequencyCounter 10 < leipzig1M.txt
029 *  government 24763
030 *
031 *
032 *************************************************************************/
033/*
034-------------------------------------------------
035Results Leipzig, 15
036-------------------------------------------------
037Java TreeMap (ST)
038  put time: 17.078
039  get time: 0.023
040  most frequent word = over-the-counter (1067 times)
041  distinct = 22172
042  words    = 64647
043LinearProbingHashST
044  put time: 17.11
045  get time: 0.024
046SeparateChainingHashST
047  put time: 19.439
048  get time: 0.03
049BST
050  put time: 24.604
051  get time: 0.045
052RedBlackBST
053  put time: 25.651
054  get time: 0.045
055Ordered Array (BinarySearchST)
056  put time: 25.862
057  get time: 0.035
058Unordered Array (ArrayST)
059  put time: 54.04
060  get time: 7.073
061Unordered Linked List (SequentialSearchST)
062  put time: 71.719
063  get time: 11.849
064
065-------------------------------------------------
066Results Leipzig, 10
067-------------------------------------------------
068Java TreeMap (ST)
069  put time: 30.91
070  get time: 0.154
071  most frequent word = government (24763 times)
072  distinct =  165555
073  words    = 1610829
074LinearProbingHashST
075  put time: 25.752
076  get time: 0.075
077SeparateChainingHashST
078  put time: 27.603
079  get time: 0.123
080BST
081  put time: 31.053
082  get time: 0.248
083RedBlackBST
084  put time: 32.014
085  get time: 0.237
086Ordered Array (BinarySearchST)
087  put time: 153.086
088  get time: 0.252
089Unordered Array (ArrayST)
090  ...
091 */
092public class FrequencyCounter {
093
094        public static void main(String[] args) {
095                //int minlen = 2; String file = "data/tinyTale.txt";
096                int minlen = 6; String file = "data/tale.txt";
097                //int minlen = 12; String file = "data/leipzig1M.txt";
098                StdOut.println ("Java TreeMap (ST)"); testST(minlen, file);
099                //StdOut.println ("LinearProbingHashST"); testLinearProbingHashST(minlen, file);
100                //StdOut.println ("SeparateChainingHashST"); testSeparateChainingHashST(minlen, file);
101                //StdOut.println ("BST"); testBST(minlen, file);
102                //StdOut.println ("RedBlackBST"); testRedBlackBST(minlen, file);
103                //StdOut.println ("Ordered Array (BinarySearchST)"); testBinarySearchST(minlen, file);
104                //StdOut.println ("Unordered Array (ArrayST)"); testArrayST(minlen, file);
105                //StdOut.println ("Unordered Linked List (SequentialSearchST)"); testSequentialSearchST(minlen, file);
106        }
107
108        private static void testLinearProbingHashST (int minlen, String file) {
109                LinearProbingHashST<String, Integer> st = new LinearProbingHashST<>();
110
111                In in = new In (file);
112                Stopwatch sw = new Stopwatch();
113                while (!in.isEmpty()) {
114                        String key = in.readString();
115                        if (key.length() < minlen) continue;
116                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
117                        else                  { st.put(key, 1); }
118                }
119                StdOut.println("  put time: " + sw.elapsedTime ());
120
121                sw = new Stopwatch();
122                String max = "";
123                st.put(max, 0);
124                for (String word : st.keys()) {
125                        if (st.get(word) > st.get(max))
126                                max = word;
127                }
128                StdOut.println("  get time: " + sw.elapsedTime ());
129                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
130        }
131        private static void testSeparateChainingHashST (int minlen, String file) {
132                SeparateChainingHashST<String, Integer> st = new SeparateChainingHashST<>();
133
134                In in = new In (file);
135                Stopwatch sw = new Stopwatch();
136                while (!in.isEmpty()) {
137                        String key = in.readString();
138                        if (key.length() < minlen) continue;
139                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
140                        else                  { st.put(key, 1); }
141                }
142                StdOut.println("  put time: " + sw.elapsedTime ());
143
144                sw = new Stopwatch();
145                String max = "";
146                st.put(max, 0);
147                for (String word : st.keys()) {
148                        if (st.get(word) > st.get(max))
149                                max = word;
150                }
151                StdOut.println("  get time: " + sw.elapsedTime ());
152                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
153        }
154        private static void testArrayST (int minlen, String file) {
155                ArrayST<String, Integer> st = new ArrayST<>();
156
157                In in = new In (file);
158                Stopwatch sw = new Stopwatch();
159                while (!in.isEmpty()) {
160                        String key = in.readString();
161                        if (key.length() < minlen) continue;
162                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
163                        else                  { st.put(key, 1); }
164                }
165                StdOut.println("  put time: " + sw.elapsedTime ());
166
167                sw = new Stopwatch();
168                String max = "";
169                st.put(max, 0);
170                for (String word : st.keys()) {
171                        if (st.get(word) > st.get(max))
172                                max = word;
173                }
174                StdOut.println("  get time: " + sw.elapsedTime ());
175                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
176        }
177        private static void testRedBlackBST (int minlen, String file) {
178                RedBlackBST<String, Integer> st = new RedBlackBST<>();
179
180                In in = new In (file);
181                Stopwatch sw = new Stopwatch();
182                while (!in.isEmpty()) {
183                        String key = in.readString();
184                        if (key.length() < minlen) continue;
185                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
186                        else                  { st.put(key, 1); }
187                }
188                StdOut.println("  put time: " + sw.elapsedTime ());
189
190                sw = new Stopwatch();
191                String max = "";
192                st.put(max, 0);
193                for (String word : st.keys()) {
194                        if (st.get(word) > st.get(max))
195                                max = word;
196                }
197                StdOut.println("  get time: " + sw.elapsedTime ());
198                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
199        }
200        private static void testSequentialSearchST (int minlen, String file) {
201                SequentialSearchST<String, Integer> st = new SequentialSearchST<>();
202
203                In in = new In (file);
204                Stopwatch sw = new Stopwatch();
205                while (!in.isEmpty()) {
206                        String key = in.readString();
207                        if (key.length() < minlen) continue;
208                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
209                        else                  { st.put(key, 1); }
210                }
211                StdOut.println("  put time: " + sw.elapsedTime ());
212
213                sw = new Stopwatch();
214                String max = "";
215                st.put(max, 0);
216                for (String word : st.keys()) {
217                        if (st.get(word) > st.get(max))
218                                max = word;
219                }
220                StdOut.println("  get time: " + sw.elapsedTime ());
221                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
222        }
223        private static void testBST (int minlen, String file) {
224                BST<String, Integer> st = new BST<>();
225
226                In in = new In (file);
227                Stopwatch sw = new Stopwatch();
228                while (!in.isEmpty()) {
229                        String key = in.readString();
230                        if (key.length() < minlen) continue;
231                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
232                        else                  { st.put(key, 1); }
233                }
234                StdOut.println("  put time: " + sw.elapsedTime ());
235
236                sw = new Stopwatch();
237                String max = "";
238                st.put(max, 0);
239                for (String word : st.keys()) {
240                        if (st.get(word) > st.get(max))
241                                max = word;
242                }
243                StdOut.println("  get time: " + sw.elapsedTime ());
244                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
245        }
246        private static void testBinarySearchST (int minlen, String file) {
247                BinarySearchST<String, Integer> st = new BinarySearchST<>();
248
249                In in = new In (file);
250                Stopwatch sw = new Stopwatch();
251                while (!in.isEmpty()) {
252                        String key = in.readString();
253                        if (key.length() < minlen) continue;
254                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
255                        else                  { st.put(key, 1); }
256                }
257                StdOut.println("  put time: " + sw.elapsedTime ());
258
259                sw = new Stopwatch();
260                String max = "";
261                st.put(max, 0);
262                for (String word : st.keys()) {
263                        if (st.get(word) > st.get(max))
264                                max = word;
265                }
266                StdOut.println("  get time: " + sw.elapsedTime ());
267                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
268        }
269        private static void testST (int minlen, String file) {
270                int words = 0;
271                In in = new In (file);
272                ST<String, Integer> st = new ST<>();
273
274                // compute frequency counts
275                Stopwatch sw = new Stopwatch();
276                while (!in.isEmpty()) {
277                        String key = in.readString();
278                        if (key.length() < minlen) continue;
279                        words++;
280                        if (st.contains(key)) { st.put(key, st.get(key) + 1); }
281                        else                  { st.put(key, 1); }
282                }
283                StdOut.println("  put time: " + sw.elapsedTime ());
284
285                // find a key with the highest frequency count
286                sw = new Stopwatch();
287                String max = "";
288                st.put(max, 0);
289                for (String word : st.keys()) {
290                        if (st.get(word) > st.get(max))
291                                max = word;
292                }
293                StdOut.println("  get time: " + sw.elapsedTime ());
294
295                StdOut.println("  most frequent word = " + max + " (" + st.get(max) + " times)");
296                StdOut.println("  distinct = " + (st.size ()-1));
297                StdOut.println("  words  = " + words);
298        }
299
300
301}