001package algs31; 002import stdlib.*; 003import algs32.BST; 004import algs33.RedBlackBST; 005import algs34.LinearProbingHashST; 006import algs34.SeparateChainingHashST; 007import algs35.ST; 008/* *********************************************************************** 009 * Compilation: javac FrequencyCounter.java 010 * Execution: java FrequencyCounter L < input.txt 011 * Dependencies: ST.java StdIn.java StdOut.java 012 * Data files: http://algs4.cs.princeton.edu/41elementary/tnyTale.txt 013 * http://algs4.cs.princeton.edu/41elementary/tale.txt 014 * http://algs4.cs.princeton.edu/41elementary/tale.txt 015 * http://algs4.cs.princeton.edu/41elementary/leipzig100K.txt 016 * http://algs4.cs.princeton.edu/41elementary/leipzig300K.txt 017 * http://algs4.cs.princeton.edu/41elementary/leipzig1M.txt 018 * 019 * Read in a list of words from standard input and print out 020 * the most frequently occurring word. 021 * 022 * % java FrequencyCounter 1 < tinyTale.txt 023 * it 10 024 * 025 * % java FrequencyCounter 8 < tale.txt 026 * business 122 027 * 028 * % java FrequencyCounter 10 < leipzig1M.txt 029 * government 24763 030 * 031 * 032 *************************************************************************/ 033/* 034------------------------------------------------- 035Results Leipzig, 15 036------------------------------------------------- 037Java TreeMap (ST) 038 put time: 17.078 039 get time: 0.023 040 most frequent word = over-the-counter (1067 times) 041 distinct = 22172 042 words = 64647 043LinearProbingHashST 044 put time: 17.11 045 get time: 0.024 046SeparateChainingHashST 047 put time: 19.439 048 get time: 0.03 049BST 050 put time: 24.604 051 get time: 0.045 052RedBlackBST 053 put time: 25.651 054 get time: 0.045 055Ordered Array (BinarySearchST) 056 put time: 25.862 057 get time: 0.035 058Unordered Array (ArrayST) 059 put time: 54.04 060 get time: 7.073 061Unordered Linked List (SequentialSearchST) 062 put time: 71.719 063 get time: 11.849 064 065------------------------------------------------- 066Results Leipzig, 10 067------------------------------------------------- 068Java TreeMap (ST) 069 put time: 30.91 070 get time: 0.154 071 most frequent word = government (24763 times) 072 distinct = 165555 073 words = 1610829 074LinearProbingHashST 075 put time: 25.752 076 get time: 0.075 077SeparateChainingHashST 078 put time: 27.603 079 get time: 0.123 080BST 081 put time: 31.053 082 get time: 0.248 083RedBlackBST 084 put time: 32.014 085 get time: 0.237 086Ordered Array (BinarySearchST) 087 put time: 153.086 088 get time: 0.252 089Unordered Array (ArrayST) 090 ... 091 */ 092public class FrequencyCounter { 093 094 public static void main(String[] args) { 095 //int minlen = 2; String file = "data/tinyTale.txt"; 096 int minlen = 6; String file = "data/tale.txt"; 097 //int minlen = 12; String file = "data/leipzig1M.txt"; 098 StdOut.println ("Java TreeMap (ST)"); testST(minlen, file); 099 //StdOut.println ("LinearProbingHashST"); testLinearProbingHashST(minlen, file); 100 //StdOut.println ("SeparateChainingHashST"); testSeparateChainingHashST(minlen, file); 101 //StdOut.println ("BST"); testBST(minlen, file); 102 //StdOut.println ("RedBlackBST"); testRedBlackBST(minlen, file); 103 //StdOut.println ("Ordered Array (BinarySearchST)"); testBinarySearchST(minlen, file); 104 //StdOut.println ("Unordered Array (ArrayST)"); testArrayST(minlen, file); 105 //StdOut.println ("Unordered Linked List (SequentialSearchST)"); testSequentialSearchST(minlen, file); 106 } 107 108 private static void testLinearProbingHashST (int minlen, String file) { 109 LinearProbingHashST<String, Integer> st = new LinearProbingHashST<>(); 110 111 In in = new In (file); 112 Stopwatch sw = new Stopwatch(); 113 while (!in.isEmpty()) { 114 String key = in.readString(); 115 if (key.length() < minlen) continue; 116 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 117 else { st.put(key, 1); } 118 } 119 StdOut.println(" put time: " + sw.elapsedTime ()); 120 121 sw = new Stopwatch(); 122 String max = ""; 123 st.put(max, 0); 124 for (String word : st.keys()) { 125 if (st.get(word) > st.get(max)) 126 max = word; 127 } 128 StdOut.println(" get time: " + sw.elapsedTime ()); 129 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 130 } 131 private static void testSeparateChainingHashST (int minlen, String file) { 132 SeparateChainingHashST<String, Integer> st = new SeparateChainingHashST<>(); 133 134 In in = new In (file); 135 Stopwatch sw = new Stopwatch(); 136 while (!in.isEmpty()) { 137 String key = in.readString(); 138 if (key.length() < minlen) continue; 139 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 140 else { st.put(key, 1); } 141 } 142 StdOut.println(" put time: " + sw.elapsedTime ()); 143 144 sw = new Stopwatch(); 145 String max = ""; 146 st.put(max, 0); 147 for (String word : st.keys()) { 148 if (st.get(word) > st.get(max)) 149 max = word; 150 } 151 StdOut.println(" get time: " + sw.elapsedTime ()); 152 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 153 } 154 private static void testArrayST (int minlen, String file) { 155 ArrayST<String, Integer> st = new ArrayST<>(); 156 157 In in = new In (file); 158 Stopwatch sw = new Stopwatch(); 159 while (!in.isEmpty()) { 160 String key = in.readString(); 161 if (key.length() < minlen) continue; 162 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 163 else { st.put(key, 1); } 164 } 165 StdOut.println(" put time: " + sw.elapsedTime ()); 166 167 sw = new Stopwatch(); 168 String max = ""; 169 st.put(max, 0); 170 for (String word : st.keys()) { 171 if (st.get(word) > st.get(max)) 172 max = word; 173 } 174 StdOut.println(" get time: " + sw.elapsedTime ()); 175 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 176 } 177 private static void testRedBlackBST (int minlen, String file) { 178 RedBlackBST<String, Integer> st = new RedBlackBST<>(); 179 180 In in = new In (file); 181 Stopwatch sw = new Stopwatch(); 182 while (!in.isEmpty()) { 183 String key = in.readString(); 184 if (key.length() < minlen) continue; 185 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 186 else { st.put(key, 1); } 187 } 188 StdOut.println(" put time: " + sw.elapsedTime ()); 189 190 sw = new Stopwatch(); 191 String max = ""; 192 st.put(max, 0); 193 for (String word : st.keys()) { 194 if (st.get(word) > st.get(max)) 195 max = word; 196 } 197 StdOut.println(" get time: " + sw.elapsedTime ()); 198 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 199 } 200 private static void testSequentialSearchST (int minlen, String file) { 201 SequentialSearchST<String, Integer> st = new SequentialSearchST<>(); 202 203 In in = new In (file); 204 Stopwatch sw = new Stopwatch(); 205 while (!in.isEmpty()) { 206 String key = in.readString(); 207 if (key.length() < minlen) continue; 208 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 209 else { st.put(key, 1); } 210 } 211 StdOut.println(" put time: " + sw.elapsedTime ()); 212 213 sw = new Stopwatch(); 214 String max = ""; 215 st.put(max, 0); 216 for (String word : st.keys()) { 217 if (st.get(word) > st.get(max)) 218 max = word; 219 } 220 StdOut.println(" get time: " + sw.elapsedTime ()); 221 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 222 } 223 private static void testBST (int minlen, String file) { 224 BST<String, Integer> st = new BST<>(); 225 226 In in = new In (file); 227 Stopwatch sw = new Stopwatch(); 228 while (!in.isEmpty()) { 229 String key = in.readString(); 230 if (key.length() < minlen) continue; 231 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 232 else { st.put(key, 1); } 233 } 234 StdOut.println(" put time: " + sw.elapsedTime ()); 235 236 sw = new Stopwatch(); 237 String max = ""; 238 st.put(max, 0); 239 for (String word : st.keys()) { 240 if (st.get(word) > st.get(max)) 241 max = word; 242 } 243 StdOut.println(" get time: " + sw.elapsedTime ()); 244 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 245 } 246 private static void testBinarySearchST (int minlen, String file) { 247 BinarySearchST<String, Integer> st = new BinarySearchST<>(); 248 249 In in = new In (file); 250 Stopwatch sw = new Stopwatch(); 251 while (!in.isEmpty()) { 252 String key = in.readString(); 253 if (key.length() < minlen) continue; 254 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 255 else { st.put(key, 1); } 256 } 257 StdOut.println(" put time: " + sw.elapsedTime ()); 258 259 sw = new Stopwatch(); 260 String max = ""; 261 st.put(max, 0); 262 for (String word : st.keys()) { 263 if (st.get(word) > st.get(max)) 264 max = word; 265 } 266 StdOut.println(" get time: " + sw.elapsedTime ()); 267 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 268 } 269 private static void testST (int minlen, String file) { 270 int words = 0; 271 In in = new In (file); 272 ST<String, Integer> st = new ST<>(); 273 274 // compute frequency counts 275 Stopwatch sw = new Stopwatch(); 276 while (!in.isEmpty()) { 277 String key = in.readString(); 278 if (key.length() < minlen) continue; 279 words++; 280 if (st.contains(key)) { st.put(key, st.get(key) + 1); } 281 else { st.put(key, 1); } 282 } 283 StdOut.println(" put time: " + sw.elapsedTime ()); 284 285 // find a key with the highest frequency count 286 sw = new Stopwatch(); 287 String max = ""; 288 st.put(max, 0); 289 for (String word : st.keys()) { 290 if (st.get(word) > st.get(max)) 291 max = word; 292 } 293 StdOut.println(" get time: " + sw.elapsedTime ()); 294 295 StdOut.println(" most frequent word = " + max + " (" + st.get(max) + " times)"); 296 StdOut.println(" distinct = " + (st.size ()-1)); 297 StdOut.println(" words = " + words); 298 } 299 300 301}